diff --git "a/checkpoint-26750/trainer_state.json" "b/checkpoint-26750/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-26750/trainer_state.json" @@ -0,0 +1,241408 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.8346285920838605, + "global_step": 26750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.2878532409667969, + "learning_rate": 4.3371403787769264e-09, + "loss": 0.3044, + "step": 1, + "teacher_loss": 0.30623510479927063 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.49937474727630615, + "learning_rate": 8.674280757553853e-09, + "loss": 0.3654, + "step": 2, + "teacher_loss": 0.35050299763679504 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.7952067852020264, + "learning_rate": 1.301142113633078e-08, + "loss": 0.4253, + "step": 3, + "teacher_loss": 0.38419413566589355 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.752353310585022, + "learning_rate": 1.7348561515107706e-08, + "loss": 0.4879, + "step": 4, + "teacher_loss": 0.4584733247756958 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.6486436724662781, + "learning_rate": 2.1685701893884633e-08, + "loss": 0.3889, + "step": 5, + "teacher_loss": 0.3600001633167267 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.6064157485961914, + "learning_rate": 2.602284227266156e-08, + "loss": 0.3314, + "step": 6, + "teacher_loss": 0.3008692264556885 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.603389322757721, + "learning_rate": 3.0359982651438484e-08, + "loss": 0.426, + "step": 7, + "teacher_loss": 0.40631771087646484 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.47560954093933105, + "learning_rate": 3.469712303021541e-08, + "loss": 0.8501, + "step": 8, + "teacher_loss": 0.8917432427406311 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.35841572284698486, + "learning_rate": 3.903426340899234e-08, + "loss": 0.3169, + "step": 9, + "teacher_loss": 0.3123272657394409 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.4316421151161194, + "learning_rate": 4.3371403787769266e-08, + "loss": 0.5676, + "step": 10, + "teacher_loss": 0.5826940536499023 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.5786064863204956, + "learning_rate": 4.770854416654619e-08, + "loss": 0.3257, + "step": 11, + "teacher_loss": 0.29761967062950134 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.18219050765037537, + "learning_rate": 5.204568454532312e-08, + "loss": 0.2459, + "step": 12, + "teacher_loss": 0.25292351841926575 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.1468430459499359, + "learning_rate": 5.638282492410005e-08, + "loss": 0.4292, + "step": 13, + "teacher_loss": 0.4606162905693054 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.2759707570075989, + "learning_rate": 6.071996530287697e-08, + "loss": 0.2951, + "step": 14, + "teacher_loss": 0.29724395275115967 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.615320086479187, + "learning_rate": 6.505710568165389e-08, + "loss": 0.4713, + "step": 15, + "teacher_loss": 0.4552942216396332 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.6560722589492798, + "learning_rate": 6.939424606043082e-08, + "loss": 0.5152, + "step": 16, + "teacher_loss": 0.49955758452415466 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.7656878232955933, + "learning_rate": 7.373138643920774e-08, + "loss": 0.4166, + "step": 17, + "teacher_loss": 0.3777827024459839 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.3205189108848572, + "learning_rate": 7.806852681798468e-08, + "loss": 0.4887, + "step": 18, + "teacher_loss": 0.5074136853218079 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.5319740772247314, + "learning_rate": 8.24056671967616e-08, + "loss": 0.4459, + "step": 19, + "teacher_loss": 0.43633657693862915 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.7975113987922668, + "learning_rate": 8.674280757553853e-08, + "loss": 0.3598, + "step": 20, + "teacher_loss": 0.31118321418762207 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.5031394362449646, + "learning_rate": 9.107994795431545e-08, + "loss": 0.2944, + "step": 21, + "teacher_loss": 0.2712216377258301 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.327789843082428, + "learning_rate": 9.541708833309239e-08, + "loss": 0.2686, + "step": 22, + "teacher_loss": 0.26207101345062256 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.7447530627250671, + "learning_rate": 9.975422871186931e-08, + "loss": 0.3677, + "step": 23, + "teacher_loss": 0.3258216381072998 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.5696777105331421, + "learning_rate": 1.0409136909064624e-07, + "loss": 0.432, + "step": 24, + "teacher_loss": 0.41666150093078613 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.31212449073791504, + "learning_rate": 1.0842850946942316e-07, + "loss": 0.6133, + "step": 25, + "teacher_loss": 0.646796703338623 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.21366344392299652, + "learning_rate": 1.127656498482001e-07, + "loss": 0.4887, + "step": 26, + "teacher_loss": 0.5192395448684692 + }, + { + "compression_loss": 0.0, + "epoch": 0.0, + "label_loss": 0.5291810035705566, + "learning_rate": 1.1710279022697702e-07, + "loss": 0.4335, + "step": 27, + "teacher_loss": 0.4228705167770386 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5960581302642822, + "learning_rate": 1.2143993060575394e-07, + "loss": 0.3965, + "step": 28, + "teacher_loss": 0.37433257699012756 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.2765660583972931, + "learning_rate": 1.2577707098453087e-07, + "loss": 0.3472, + "step": 29, + "teacher_loss": 0.35501280426979065 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3112162947654724, + "learning_rate": 1.3011421136330778e-07, + "loss": 0.2953, + "step": 30, + "teacher_loss": 0.2934904396533966 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5781139135360718, + "learning_rate": 1.3445135174208474e-07, + "loss": 0.6268, + "step": 31, + "teacher_loss": 0.6322437524795532 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.551255464553833, + "learning_rate": 1.3878849212086165e-07, + "loss": 0.3947, + "step": 32, + "teacher_loss": 0.3773103356361389 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.6234651803970337, + "learning_rate": 1.4312563249963858e-07, + "loss": 0.4112, + "step": 33, + "teacher_loss": 0.38762766122817993 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.9360308647155762, + "learning_rate": 1.4746277287841549e-07, + "loss": 0.4008, + "step": 34, + "teacher_loss": 0.34132856130599976 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.47657620906829834, + "learning_rate": 1.5179991325719245e-07, + "loss": 0.5855, + "step": 35, + "teacher_loss": 0.5975528359413147 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.40065234899520874, + "learning_rate": 1.5613705363596935e-07, + "loss": 0.3151, + "step": 36, + "teacher_loss": 0.30564457178115845 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.2311306595802307, + "learning_rate": 1.604741940147463e-07, + "loss": 0.2869, + "step": 37, + "teacher_loss": 0.2930516302585602 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3395083546638489, + "learning_rate": 1.648113343935232e-07, + "loss": 0.3718, + "step": 38, + "teacher_loss": 0.37534695863723755 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.167999267578125, + "learning_rate": 1.6914847477230013e-07, + "loss": 0.2363, + "step": 39, + "teacher_loss": 0.24388030171394348 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.6877390742301941, + "learning_rate": 1.7348561515107706e-07, + "loss": 0.3837, + "step": 40, + "teacher_loss": 0.34989410638809204 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.33083808422088623, + "learning_rate": 1.7782275552985397e-07, + "loss": 0.3266, + "step": 41, + "teacher_loss": 0.3260805606842041 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.32144296169281006, + "learning_rate": 1.821598959086309e-07, + "loss": 0.704, + "step": 42, + "teacher_loss": 0.7464689016342163 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.6025416851043701, + "learning_rate": 1.8649703628740784e-07, + "loss": 0.4745, + "step": 43, + "teacher_loss": 0.460231751203537 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5173925161361694, + "learning_rate": 1.9083417666618477e-07, + "loss": 0.4872, + "step": 44, + "teacher_loss": 0.4838038682937622 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3744485378265381, + "learning_rate": 1.9517131704496168e-07, + "loss": 0.3537, + "step": 45, + "teacher_loss": 0.35138148069381714 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.21013106405735016, + "learning_rate": 1.9950845742373861e-07, + "loss": 0.2938, + "step": 46, + "teacher_loss": 0.3030865490436554 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.540357768535614, + "learning_rate": 2.0384559780251555e-07, + "loss": 0.3767, + "step": 47, + "teacher_loss": 0.3585663437843323 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.4985116422176361, + "learning_rate": 2.0818273818129248e-07, + "loss": 0.3204, + "step": 48, + "teacher_loss": 0.30065175890922546 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.4947139024734497, + "learning_rate": 2.125198785600694e-07, + "loss": 0.6806, + "step": 49, + "teacher_loss": 0.7013063430786133 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3116215467453003, + "learning_rate": 2.1685701893884632e-07, + "loss": 0.3689, + "step": 50, + "teacher_loss": 0.37528449296951294 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.40373694896698, + "learning_rate": 2.2119415931762326e-07, + "loss": 0.5921, + "step": 51, + "teacher_loss": 0.6130533814430237 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5308322906494141, + "learning_rate": 2.255312996964002e-07, + "loss": 0.4236, + "step": 52, + "teacher_loss": 0.41172850131988525 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.42343878746032715, + "learning_rate": 2.298684400751771e-07, + "loss": 0.3904, + "step": 53, + "teacher_loss": 0.3866812586784363 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.36457860469818115, + "learning_rate": 2.3420558045395403e-07, + "loss": 0.342, + "step": 54, + "teacher_loss": 0.33951157331466675 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.33712244033813477, + "learning_rate": 2.38542720832731e-07, + "loss": 0.259, + "step": 55, + "teacher_loss": 0.25035154819488525 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.42537349462509155, + "learning_rate": 2.4287986121150787e-07, + "loss": 0.3422, + "step": 56, + "teacher_loss": 0.3330062925815582 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.7673524618148804, + "learning_rate": 2.472170015902848e-07, + "loss": 0.3707, + "step": 57, + "teacher_loss": 0.326668918132782 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.43040502071380615, + "learning_rate": 2.5155414196906174e-07, + "loss": 0.542, + "step": 58, + "teacher_loss": 0.5543990731239319 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.41028231382369995, + "learning_rate": 2.558912823478386e-07, + "loss": 0.3497, + "step": 59, + "teacher_loss": 0.3429994285106659 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 1.0286916494369507, + "learning_rate": 2.6022842272661556e-07, + "loss": 0.4371, + "step": 60, + "teacher_loss": 0.3713400959968567 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5336390137672424, + "learning_rate": 2.6456556310539254e-07, + "loss": 0.343, + "step": 61, + "teacher_loss": 0.32187139987945557 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.19562755525112152, + "learning_rate": 2.689027034841695e-07, + "loss": 0.2846, + "step": 62, + "teacher_loss": 0.2945200204849243 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5079911351203918, + "learning_rate": 2.7323984386294636e-07, + "loss": 0.3185, + "step": 63, + "teacher_loss": 0.29742881655693054 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3633435368537903, + "learning_rate": 2.775769842417233e-07, + "loss": 0.3915, + "step": 64, + "teacher_loss": 0.3946162462234497 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.4795653522014618, + "learning_rate": 2.819141246205002e-07, + "loss": 0.3785, + "step": 65, + "teacher_loss": 0.3672950863838196 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5867092609405518, + "learning_rate": 2.8625126499927716e-07, + "loss": 0.526, + "step": 66, + "teacher_loss": 0.5192420482635498 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3838410973548889, + "learning_rate": 2.9058840537805404e-07, + "loss": 0.3074, + "step": 67, + "teacher_loss": 0.29894983768463135 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.2822248339653015, + "learning_rate": 2.9492554575683097e-07, + "loss": 0.3579, + "step": 68, + "teacher_loss": 0.366265207529068 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.4573609232902527, + "learning_rate": 2.9926268613560796e-07, + "loss": 0.3274, + "step": 69, + "teacher_loss": 0.31290900707244873 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.17300641536712646, + "learning_rate": 3.035998265143849e-07, + "loss": 0.4856, + "step": 70, + "teacher_loss": 0.5203573703765869 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.36407849192619324, + "learning_rate": 3.079369668931618e-07, + "loss": 0.3696, + "step": 71, + "teacher_loss": 0.3702436089515686 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.4354317784309387, + "learning_rate": 3.122741072719387e-07, + "loss": 0.4809, + "step": 72, + "teacher_loss": 0.4859127402305603 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.3354983329772949, + "learning_rate": 3.1661124765071564e-07, + "loss": 0.3284, + "step": 73, + "teacher_loss": 0.32761678099632263 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.4563247561454773, + "learning_rate": 3.209483880294926e-07, + "loss": 0.4131, + "step": 74, + "teacher_loss": 0.4083379805088043 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.34043627977371216, + "learning_rate": 3.2528552840826946e-07, + "loss": 0.2446, + "step": 75, + "teacher_loss": 0.23390792310237885 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5458319187164307, + "learning_rate": 3.296226687870464e-07, + "loss": 0.4366, + "step": 76, + "teacher_loss": 0.42451536655426025 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.6412752866744995, + "learning_rate": 3.339598091658234e-07, + "loss": 0.4724, + "step": 77, + "teacher_loss": 0.4536617398262024 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.5172377228736877, + "learning_rate": 3.3829694954460026e-07, + "loss": 0.4266, + "step": 78, + "teacher_loss": 0.4165247082710266 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.39089351892471313, + "learning_rate": 3.426340899233772e-07, + "loss": 0.3328, + "step": 79, + "teacher_loss": 0.3263006806373596 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.43537408113479614, + "learning_rate": 3.4697123030215413e-07, + "loss": 0.3144, + "step": 80, + "teacher_loss": 0.3009788990020752 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 0.6011801958084106, + "learning_rate": 3.5130837068093106e-07, + "loss": 0.4713, + "step": 81, + "teacher_loss": 0.45690327882766724 + }, + { + "compression_loss": 0.0, + "epoch": 0.01, + "label_loss": 1.076367735862732, + "learning_rate": 3.5564551105970794e-07, + "loss": 0.5857, + "step": 82, + "teacher_loss": 0.5311605930328369 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.37369078397750854, + "learning_rate": 3.599826514384849e-07, + "loss": 0.35, + "step": 83, + "teacher_loss": 0.34740591049194336 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3828126788139343, + "learning_rate": 3.643197918172618e-07, + "loss": 0.5781, + "step": 84, + "teacher_loss": 0.5998474359512329 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.5417852997779846, + "learning_rate": 3.686569321960388e-07, + "loss": 0.2808, + "step": 85, + "teacher_loss": 0.2518404424190521 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3188953101634979, + "learning_rate": 3.729940725748157e-07, + "loss": 0.4551, + "step": 86, + "teacher_loss": 0.47025638818740845 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.27739542722702026, + "learning_rate": 3.773312129535926e-07, + "loss": 0.4346, + "step": 87, + "teacher_loss": 0.45206791162490845 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.8092740774154663, + "learning_rate": 3.8166835333236954e-07, + "loss": 0.5619, + "step": 88, + "teacher_loss": 0.5344049334526062 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.5842029452323914, + "learning_rate": 3.860054937111465e-07, + "loss": 0.4171, + "step": 89, + "teacher_loss": 0.39858120679855347 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.37819138169288635, + "learning_rate": 3.9034263408992336e-07, + "loss": 0.2797, + "step": 90, + "teacher_loss": 0.2687514126300812 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6116352081298828, + "learning_rate": 3.946797744687003e-07, + "loss": 0.4149, + "step": 91, + "teacher_loss": 0.393058180809021 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.24421319365501404, + "learning_rate": 3.9901691484747723e-07, + "loss": 0.2807, + "step": 92, + "teacher_loss": 0.28480595350265503 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.19853071868419647, + "learning_rate": 4.033540552262542e-07, + "loss": 0.225, + "step": 93, + "teacher_loss": 0.2279570996761322 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.8776455521583557, + "learning_rate": 4.076911956050311e-07, + "loss": 0.3779, + "step": 94, + "teacher_loss": 0.32237839698791504 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3355174958705902, + "learning_rate": 4.1202833598380803e-07, + "loss": 0.3764, + "step": 95, + "teacher_loss": 0.38089990615844727 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.7735798358917236, + "learning_rate": 4.1636547636258496e-07, + "loss": 0.44, + "step": 96, + "teacher_loss": 0.40291130542755127 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.33850619196891785, + "learning_rate": 4.2070261674136184e-07, + "loss": 0.2829, + "step": 97, + "teacher_loss": 0.27667152881622314 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.31478065252304077, + "learning_rate": 4.250397571201388e-07, + "loss": 0.2783, + "step": 98, + "teacher_loss": 0.27422410249710083 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3673118054866791, + "learning_rate": 4.293768974989157e-07, + "loss": 0.2723, + "step": 99, + "teacher_loss": 0.2617168724536896 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.37465184926986694, + "learning_rate": 4.3371403787769265e-07, + "loss": 0.2707, + "step": 100, + "teacher_loss": 0.25916990637779236 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3754885196685791, + "learning_rate": 4.380511782564696e-07, + "loss": 0.2643, + "step": 101, + "teacher_loss": 0.2519530951976776 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.4311053454875946, + "learning_rate": 4.423883186352465e-07, + "loss": 0.3775, + "step": 102, + "teacher_loss": 0.37158575654029846 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.35080692172050476, + "learning_rate": 4.4672545901402345e-07, + "loss": 0.3724, + "step": 103, + "teacher_loss": 0.3748095631599426 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.24182593822479248, + "learning_rate": 4.510625993928004e-07, + "loss": 0.2525, + "step": 104, + "teacher_loss": 0.25370627641677856 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.34194818139076233, + "learning_rate": 4.5539973977157726e-07, + "loss": 0.23, + "step": 105, + "teacher_loss": 0.21761192381381989 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.24426543712615967, + "learning_rate": 4.597368801503542e-07, + "loss": 0.4005, + "step": 106, + "teacher_loss": 0.4178938865661621 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.27553167939186096, + "learning_rate": 4.6407402052913113e-07, + "loss": 0.2505, + "step": 107, + "teacher_loss": 0.24769991636276245 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 1.2734107971191406, + "learning_rate": 4.6841116090790806e-07, + "loss": 0.4416, + "step": 108, + "teacher_loss": 0.34919288754463196 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.37502509355545044, + "learning_rate": 4.72748301286685e-07, + "loss": 0.3259, + "step": 109, + "teacher_loss": 0.32042086124420166 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6901859045028687, + "learning_rate": 4.77085441665462e-07, + "loss": 0.3745, + "step": 110, + "teacher_loss": 0.33943992853164673 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6412104964256287, + "learning_rate": 4.814225820442389e-07, + "loss": 0.4131, + "step": 111, + "teacher_loss": 0.38770169019699097 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.2801129221916199, + "learning_rate": 4.857597224230157e-07, + "loss": 0.3256, + "step": 112, + "teacher_loss": 0.33063459396362305 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.1641349494457245, + "learning_rate": 4.900968628017927e-07, + "loss": 0.2595, + "step": 113, + "teacher_loss": 0.27009546756744385 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.25451192259788513, + "learning_rate": 4.944340031805696e-07, + "loss": 0.2682, + "step": 114, + "teacher_loss": 0.26969805359840393 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.8173410892486572, + "learning_rate": 4.987711435593465e-07, + "loss": 0.4288, + "step": 115, + "teacher_loss": 0.3856676518917084 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.30786746740341187, + "learning_rate": 5.031082839381235e-07, + "loss": 0.1893, + "step": 116, + "teacher_loss": 0.17607331275939941 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.23670032620429993, + "learning_rate": 5.074454243169004e-07, + "loss": 0.364, + "step": 117, + "teacher_loss": 0.37815040349960327 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.5879061222076416, + "learning_rate": 5.117825646956772e-07, + "loss": 0.687, + "step": 118, + "teacher_loss": 0.6979761719703674 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3606983721256256, + "learning_rate": 5.161197050744542e-07, + "loss": 0.3899, + "step": 119, + "teacher_loss": 0.3930942416191101 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.27582406997680664, + "learning_rate": 5.204568454532311e-07, + "loss": 0.2923, + "step": 120, + "teacher_loss": 0.2941736578941345 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.2806050181388855, + "learning_rate": 5.247939858320082e-07, + "loss": 0.2927, + "step": 121, + "teacher_loss": 0.2940514087677002 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.40702688694000244, + "learning_rate": 5.291311262107851e-07, + "loss": 0.3119, + "step": 122, + "teacher_loss": 0.30137813091278076 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3387115001678467, + "learning_rate": 5.33468266589562e-07, + "loss": 0.27, + "step": 123, + "teacher_loss": 0.262349396944046 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3801390826702118, + "learning_rate": 5.37805406968339e-07, + "loss": 0.4172, + "step": 124, + "teacher_loss": 0.4213544428348541 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6031049489974976, + "learning_rate": 5.421425473471158e-07, + "loss": 0.366, + "step": 125, + "teacher_loss": 0.33967217803001404 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.26995834708213806, + "learning_rate": 5.464796877258927e-07, + "loss": 0.3053, + "step": 126, + "teacher_loss": 0.3091879189014435 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.2996470332145691, + "learning_rate": 5.508168281046697e-07, + "loss": 0.3781, + "step": 127, + "teacher_loss": 0.3867761194705963 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6594407558441162, + "learning_rate": 5.551539684834466e-07, + "loss": 0.2807, + "step": 128, + "teacher_loss": 0.23862676322460175 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.3946491479873657, + "learning_rate": 5.594911088622236e-07, + "loss": 0.3163, + "step": 129, + "teacher_loss": 0.30759650468826294 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.23987199366092682, + "learning_rate": 5.638282492410004e-07, + "loss": 0.1924, + "step": 130, + "teacher_loss": 0.18717840313911438 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.32025086879730225, + "learning_rate": 5.681653896197773e-07, + "loss": 0.3402, + "step": 131, + "teacher_loss": 0.34237128496170044 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.7483360767364502, + "learning_rate": 5.725025299985543e-07, + "loss": 0.6402, + "step": 132, + "teacher_loss": 0.6281335353851318 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6435651779174805, + "learning_rate": 5.768396703773312e-07, + "loss": 0.6577, + "step": 133, + "teacher_loss": 0.6593012809753418 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.2137606143951416, + "learning_rate": 5.811768107561081e-07, + "loss": 0.4271, + "step": 134, + "teacher_loss": 0.45077282190322876 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.38604655861854553, + "learning_rate": 5.855139511348851e-07, + "loss": 0.5313, + "step": 135, + "teacher_loss": 0.5474059581756592 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.5229781270027161, + "learning_rate": 5.898510915136619e-07, + "loss": 0.4228, + "step": 136, + "teacher_loss": 0.4116794168949127 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.6987260580062866, + "learning_rate": 5.941882318924389e-07, + "loss": 0.4832, + "step": 137, + "teacher_loss": 0.4592827558517456 + }, + { + "compression_loss": 0.0, + "epoch": 0.02, + "label_loss": 0.47588491439819336, + "learning_rate": 5.985253722712159e-07, + "loss": 0.4062, + "step": 138, + "teacher_loss": 0.39845460653305054 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.2496635913848877, + "learning_rate": 6.028625126499928e-07, + "loss": 0.2436, + "step": 139, + "teacher_loss": 0.24296367168426514 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.44500523805618286, + "learning_rate": 6.071996530287698e-07, + "loss": 0.3509, + "step": 140, + "teacher_loss": 0.34049880504608154 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.7203101515769958, + "learning_rate": 6.115367934075467e-07, + "loss": 0.5543, + "step": 141, + "teacher_loss": 0.5358462333679199 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.7729004621505737, + "learning_rate": 6.158739337863235e-07, + "loss": 0.4685, + "step": 142, + "teacher_loss": 0.43462610244750977 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5708900094032288, + "learning_rate": 6.202110741651005e-07, + "loss": 0.2883, + "step": 143, + "teacher_loss": 0.25688982009887695 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.46314001083374023, + "learning_rate": 6.245482145438774e-07, + "loss": 0.3364, + "step": 144, + "teacher_loss": 0.32227838039398193 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5948927998542786, + "learning_rate": 6.288853549226543e-07, + "loss": 0.3916, + "step": 145, + "teacher_loss": 0.36899033188819885 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.31737908720970154, + "learning_rate": 6.332224953014313e-07, + "loss": 0.2703, + "step": 146, + "teacher_loss": 0.2650870382785797 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4784914255142212, + "learning_rate": 6.375596356802082e-07, + "loss": 0.4235, + "step": 147, + "teacher_loss": 0.41739553213119507 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.2840213179588318, + "learning_rate": 6.418967760589852e-07, + "loss": 0.3489, + "step": 148, + "teacher_loss": 0.3561570644378662 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6581293344497681, + "learning_rate": 6.46233916437762e-07, + "loss": 0.4675, + "step": 149, + "teacher_loss": 0.4463242292404175 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.31044745445251465, + "learning_rate": 6.505710568165389e-07, + "loss": 0.3008, + "step": 150, + "teacher_loss": 0.29976487159729004 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.39795494079589844, + "learning_rate": 6.549081971953159e-07, + "loss": 0.2962, + "step": 151, + "teacher_loss": 0.2849386930465698 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.18419547379016876, + "learning_rate": 6.592453375740928e-07, + "loss": 0.1948, + "step": 152, + "teacher_loss": 0.19600102305412292 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4073745012283325, + "learning_rate": 6.635824779528698e-07, + "loss": 0.332, + "step": 153, + "teacher_loss": 0.3236081600189209 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.23148149251937866, + "learning_rate": 6.679196183316468e-07, + "loss": 0.2851, + "step": 154, + "teacher_loss": 0.29110246896743774 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4480724334716797, + "learning_rate": 6.722567587104236e-07, + "loss": 0.3282, + "step": 155, + "teacher_loss": 0.31482595205307007 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.3575660288333893, + "learning_rate": 6.765938990892005e-07, + "loss": 0.2659, + "step": 156, + "teacher_loss": 0.25568193197250366 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.45173072814941406, + "learning_rate": 6.809310394679775e-07, + "loss": 0.2062, + "step": 157, + "teacher_loss": 0.17894917726516724 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6072745323181152, + "learning_rate": 6.852681798467544e-07, + "loss": 0.4497, + "step": 158, + "teacher_loss": 0.43217384815216064 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.35858872532844543, + "learning_rate": 6.896053202255314e-07, + "loss": 0.3444, + "step": 159, + "teacher_loss": 0.34277233481407166 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6806064248085022, + "learning_rate": 6.939424606043083e-07, + "loss": 0.3469, + "step": 160, + "teacher_loss": 0.30976685881614685 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.46310800313949585, + "learning_rate": 6.982796009830851e-07, + "loss": 0.4423, + "step": 161, + "teacher_loss": 0.43999171257019043 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5555820465087891, + "learning_rate": 7.026167413618621e-07, + "loss": 0.2843, + "step": 162, + "teacher_loss": 0.25418922305107117 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.3326151967048645, + "learning_rate": 7.06953881740639e-07, + "loss": 0.3325, + "step": 163, + "teacher_loss": 0.3324737250804901 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.1965804100036621, + "learning_rate": 7.112910221194159e-07, + "loss": 0.2458, + "step": 164, + "teacher_loss": 0.2512645423412323 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.289686381816864, + "learning_rate": 7.156281624981929e-07, + "loss": 0.2338, + "step": 165, + "teacher_loss": 0.22755871713161469 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6694118976593018, + "learning_rate": 7.199653028769698e-07, + "loss": 0.3307, + "step": 166, + "teacher_loss": 0.29301539063453674 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5715506672859192, + "learning_rate": 7.243024432557467e-07, + "loss": 0.449, + "step": 167, + "teacher_loss": 0.43539315462112427 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.49966850876808167, + "learning_rate": 7.286395836345236e-07, + "loss": 0.3159, + "step": 168, + "teacher_loss": 0.29553017020225525 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4972558617591858, + "learning_rate": 7.329767240133006e-07, + "loss": 0.3065, + "step": 169, + "teacher_loss": 0.28530749678611755 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5740405321121216, + "learning_rate": 7.373138643920776e-07, + "loss": 0.2995, + "step": 170, + "teacher_loss": 0.26899081468582153 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5747886300086975, + "learning_rate": 7.416510047708545e-07, + "loss": 0.2857, + "step": 171, + "teacher_loss": 0.2536314129829407 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6649643182754517, + "learning_rate": 7.459881451496314e-07, + "loss": 0.3111, + "step": 172, + "teacher_loss": 0.27174609899520874 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.3147515058517456, + "learning_rate": 7.503252855284083e-07, + "loss": 0.3875, + "step": 173, + "teacher_loss": 0.395530641078949 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.1841243952512741, + "learning_rate": 7.546624259071852e-07, + "loss": 0.3245, + "step": 174, + "teacher_loss": 0.3400971293449402 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6122177839279175, + "learning_rate": 7.589995662859621e-07, + "loss": 0.4994, + "step": 175, + "teacher_loss": 0.4869192838668823 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.2748439610004425, + "learning_rate": 7.633367066647391e-07, + "loss": 0.2897, + "step": 176, + "teacher_loss": 0.2912960648536682 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.804057240486145, + "learning_rate": 7.67673847043516e-07, + "loss": 0.4528, + "step": 177, + "teacher_loss": 0.4138133227825165 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6062012910842896, + "learning_rate": 7.72010987422293e-07, + "loss": 0.4437, + "step": 178, + "teacher_loss": 0.4256832003593445 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.3464525640010834, + "learning_rate": 7.763481278010698e-07, + "loss": 0.3721, + "step": 179, + "teacher_loss": 0.3749992251396179 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.9590786695480347, + "learning_rate": 7.806852681798467e-07, + "loss": 0.3323, + "step": 180, + "teacher_loss": 0.2627091407775879 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.7242481708526611, + "learning_rate": 7.850224085586237e-07, + "loss": 0.355, + "step": 181, + "teacher_loss": 0.3139296770095825 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.7277974486351013, + "learning_rate": 7.893595489374006e-07, + "loss": 0.3634, + "step": 182, + "teacher_loss": 0.32291775941848755 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.29894939064979553, + "learning_rate": 7.936966893161775e-07, + "loss": 0.2436, + "step": 183, + "teacher_loss": 0.2374768853187561 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.31897616386413574, + "learning_rate": 7.980338296949545e-07, + "loss": 0.2987, + "step": 184, + "teacher_loss": 0.2964322566986084 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4672470986843109, + "learning_rate": 8.023709700737314e-07, + "loss": 0.3647, + "step": 185, + "teacher_loss": 0.3532944321632385 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.6552913188934326, + "learning_rate": 8.067081104525084e-07, + "loss": 0.4423, + "step": 186, + "teacher_loss": 0.4186667203903198 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4703465700149536, + "learning_rate": 8.110452508312853e-07, + "loss": 0.2905, + "step": 187, + "teacher_loss": 0.2704945504665375 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.36512064933776855, + "learning_rate": 8.153823912100622e-07, + "loss": 0.2582, + "step": 188, + "teacher_loss": 0.24634620547294617 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.5008628368377686, + "learning_rate": 8.197195315888392e-07, + "loss": 0.287, + "step": 189, + "teacher_loss": 0.2632533311843872 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.2761531472206116, + "learning_rate": 8.240566719676161e-07, + "loss": 0.3818, + "step": 190, + "teacher_loss": 0.39357393980026245 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.4236161410808563, + "learning_rate": 8.283938123463929e-07, + "loss": 0.3699, + "step": 191, + "teacher_loss": 0.36391544342041016 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.38792508840560913, + "learning_rate": 8.327309527251699e-07, + "loss": 0.3698, + "step": 192, + "teacher_loss": 0.3677966594696045 + }, + { + "compression_loss": 0.0, + "epoch": 0.03, + "label_loss": 0.7151434421539307, + "learning_rate": 8.370680931039468e-07, + "loss": 0.3354, + "step": 193, + "teacher_loss": 0.2932465076446533 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.29336994886398315, + "learning_rate": 8.414052334827237e-07, + "loss": 0.3233, + "step": 194, + "teacher_loss": 0.3265939950942993 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.39612430334091187, + "learning_rate": 8.457423738615007e-07, + "loss": 0.3076, + "step": 195, + "teacher_loss": 0.29779279232025146 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3977528512477875, + "learning_rate": 8.500795142402776e-07, + "loss": 0.253, + "step": 196, + "teacher_loss": 0.23690246045589447 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3796065151691437, + "learning_rate": 8.544166546190545e-07, + "loss": 0.3437, + "step": 197, + "teacher_loss": 0.33966565132141113 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.717301607131958, + "learning_rate": 8.587537949978314e-07, + "loss": 0.5056, + "step": 198, + "teacher_loss": 0.48209643363952637 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.6161582469940186, + "learning_rate": 8.630909353766083e-07, + "loss": 0.4483, + "step": 199, + "teacher_loss": 0.4296002984046936 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.6727553606033325, + "learning_rate": 8.674280757553853e-07, + "loss": 0.3356, + "step": 200, + "teacher_loss": 0.2981756329536438 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.5859873294830322, + "learning_rate": 8.717652161341623e-07, + "loss": 0.3323, + "step": 201, + "teacher_loss": 0.30413758754730225 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.6775317788124084, + "learning_rate": 8.761023565129392e-07, + "loss": 0.3207, + "step": 202, + "teacher_loss": 0.28099697828292847 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.687187910079956, + "learning_rate": 8.804394968917161e-07, + "loss": 0.3559, + "step": 203, + "teacher_loss": 0.31906983256340027 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.5126361846923828, + "learning_rate": 8.84776637270493e-07, + "loss": 0.2829, + "step": 204, + "teacher_loss": 0.2573986053466797 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.23460006713867188, + "learning_rate": 8.8911377764927e-07, + "loss": 0.2505, + "step": 205, + "teacher_loss": 0.2523079514503479 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.4875229597091675, + "learning_rate": 8.934509180280469e-07, + "loss": 0.5581, + "step": 206, + "teacher_loss": 0.5659719109535217 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3942033648490906, + "learning_rate": 8.977880584068238e-07, + "loss": 0.3104, + "step": 207, + "teacher_loss": 0.30104804039001465 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.4788144826889038, + "learning_rate": 9.021251987856008e-07, + "loss": 0.2747, + "step": 208, + "teacher_loss": 0.2520306408405304 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.5390474200248718, + "learning_rate": 9.064623391643776e-07, + "loss": 0.2919, + "step": 209, + "teacher_loss": 0.2644907236099243 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.47657227516174316, + "learning_rate": 9.107994795431545e-07, + "loss": 0.284, + "step": 210, + "teacher_loss": 0.26265081763267517 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.17647041380405426, + "learning_rate": 9.151366199219315e-07, + "loss": 0.3022, + "step": 211, + "teacher_loss": 0.3162067234516144 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.4461628794670105, + "learning_rate": 9.194737603007084e-07, + "loss": 0.5094, + "step": 212, + "teacher_loss": 0.516472339630127 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.21757963299751282, + "learning_rate": 9.238109006794853e-07, + "loss": 0.3133, + "step": 213, + "teacher_loss": 0.32392221689224243 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.1596691757440567, + "learning_rate": 9.281480410582623e-07, + "loss": 0.2204, + "step": 214, + "teacher_loss": 0.22716379165649414 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3218694031238556, + "learning_rate": 9.324851814370391e-07, + "loss": 0.2393, + "step": 215, + "teacher_loss": 0.23013153672218323 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3074747920036316, + "learning_rate": 9.368223218158161e-07, + "loss": 0.2936, + "step": 216, + "teacher_loss": 0.292053759098053 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.2528434991836548, + "learning_rate": 9.411594621945931e-07, + "loss": 0.3194, + "step": 217, + "teacher_loss": 0.32680851221084595 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.6210227608680725, + "learning_rate": 9.4549660257337e-07, + "loss": 0.2997, + "step": 218, + "teacher_loss": 0.2639586925506592 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.5951622724533081, + "learning_rate": 9.498337429521469e-07, + "loss": 0.3087, + "step": 219, + "teacher_loss": 0.2769015431404114 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3769147992134094, + "learning_rate": 9.54170883330924e-07, + "loss": 0.4781, + "step": 220, + "teacher_loss": 0.4893210828304291 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.6473550200462341, + "learning_rate": 9.585080237097007e-07, + "loss": 0.3045, + "step": 221, + "teacher_loss": 0.2664129436016083 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.32416659593582153, + "learning_rate": 9.628451640884777e-07, + "loss": 0.3289, + "step": 222, + "teacher_loss": 0.3293718695640564 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.9988908767700195, + "learning_rate": 9.671823044672545e-07, + "loss": 0.3651, + "step": 223, + "teacher_loss": 0.2947273850440979 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.39788341522216797, + "learning_rate": 9.715194448460315e-07, + "loss": 0.2987, + "step": 224, + "teacher_loss": 0.2877279818058014 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.9215387105941772, + "learning_rate": 9.758565852248085e-07, + "loss": 0.4385, + "step": 225, + "teacher_loss": 0.38485661149024963 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.4114275574684143, + "learning_rate": 9.801937256035855e-07, + "loss": 0.4904, + "step": 226, + "teacher_loss": 0.49916720390319824 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.6249231696128845, + "learning_rate": 9.845308659823625e-07, + "loss": 0.1951, + "step": 227, + "teacher_loss": 0.14731940627098083 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.4257274568080902, + "learning_rate": 9.888680063611392e-07, + "loss": 0.2733, + "step": 228, + "teacher_loss": 0.2563665211200714 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3597075939178467, + "learning_rate": 9.932051467399162e-07, + "loss": 0.6172, + "step": 229, + "teacher_loss": 0.6458524465560913 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.18291598558425903, + "learning_rate": 9.97542287118693e-07, + "loss": 0.2182, + "step": 230, + "teacher_loss": 0.2221117615699768 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.23957034945487976, + "learning_rate": 1.0018794274974702e-06, + "loss": 0.4058, + "step": 231, + "teacher_loss": 0.42427605390548706 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.34674763679504395, + "learning_rate": 1.006216567876247e-06, + "loss": 0.3248, + "step": 232, + "teacher_loss": 0.3223832845687866 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.392194926738739, + "learning_rate": 1.010553708255024e-06, + "loss": 0.2606, + "step": 233, + "teacher_loss": 0.24595263600349426 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3266582489013672, + "learning_rate": 1.0148908486338007e-06, + "loss": 0.2618, + "step": 234, + "teacher_loss": 0.2546347677707672 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.23488172888755798, + "learning_rate": 1.0192279890125777e-06, + "loss": 0.2398, + "step": 235, + "teacher_loss": 0.24030104279518127 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.31779786944389343, + "learning_rate": 1.0235651293913545e-06, + "loss": 0.2895, + "step": 236, + "teacher_loss": 0.28630515933036804 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.7872130870819092, + "learning_rate": 1.0279022697701317e-06, + "loss": 0.3855, + "step": 237, + "teacher_loss": 0.3409123420715332 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.7485690116882324, + "learning_rate": 1.0322394101489085e-06, + "loss": 0.3265, + "step": 238, + "teacher_loss": 0.2796207368373871 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3279935121536255, + "learning_rate": 1.0365765505276854e-06, + "loss": 0.2793, + "step": 239, + "teacher_loss": 0.2738860845565796 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.3581310510635376, + "learning_rate": 1.0409136909064622e-06, + "loss": 0.2538, + "step": 240, + "teacher_loss": 0.24225902557373047 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.43405529856681824, + "learning_rate": 1.0452508312852392e-06, + "loss": 0.319, + "step": 241, + "teacher_loss": 0.30625149607658386 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.47915583848953247, + "learning_rate": 1.0495879716640164e-06, + "loss": 0.3991, + "step": 242, + "teacher_loss": 0.3902336359024048 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.1838829219341278, + "learning_rate": 1.0539251120427932e-06, + "loss": 0.2737, + "step": 243, + "teacher_loss": 0.28363755345344543 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.08305107057094574, + "learning_rate": 1.0582622524215702e-06, + "loss": 0.2508, + "step": 244, + "teacher_loss": 0.26947861909866333 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.273042231798172, + "learning_rate": 1.062599392800347e-06, + "loss": 0.3815, + "step": 245, + "teacher_loss": 0.39350050687789917 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.46982133388519287, + "learning_rate": 1.066936533179124e-06, + "loss": 0.2989, + "step": 246, + "teacher_loss": 0.279954731464386 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.7464994192123413, + "learning_rate": 1.0712736735579007e-06, + "loss": 0.3518, + "step": 247, + "teacher_loss": 0.30791768431663513 + }, + { + "compression_loss": 0.0, + "epoch": 0.04, + "label_loss": 0.1897316426038742, + "learning_rate": 1.075610813936678e-06, + "loss": 0.2672, + "step": 248, + "teacher_loss": 0.27575311064720154 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.35607877373695374, + "learning_rate": 1.0799479543154547e-06, + "loss": 0.2801, + "step": 249, + "teacher_loss": 0.2716768980026245 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.33084583282470703, + "learning_rate": 1.0842850946942317e-06, + "loss": 0.2449, + "step": 250, + "teacher_loss": 0.23533692955970764 + }, + { + "epoch": 0.05, + "eval_exact_match": 79.57426679280984, + "eval_f1": 87.09971198800199, + "step": 250 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3847602307796478, + "learning_rate": 1.0886222350730084e-06, + "loss": 0.3639, + "step": 251, + "teacher_loss": 0.36158716678619385 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3344457745552063, + "learning_rate": 1.0929593754517854e-06, + "loss": 0.1738, + "step": 252, + "teacher_loss": 0.15597465634346008 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.19527855515480042, + "learning_rate": 1.0972965158305624e-06, + "loss": 0.2675, + "step": 253, + "teacher_loss": 0.27551722526550293 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4267202317714691, + "learning_rate": 1.1016336562093394e-06, + "loss": 0.258, + "step": 254, + "teacher_loss": 0.23925891518592834 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 1.2726855278015137, + "learning_rate": 1.1059707965881162e-06, + "loss": 0.5782, + "step": 255, + "teacher_loss": 0.5010079145431519 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.38246625661849976, + "learning_rate": 1.1103079369668932e-06, + "loss": 0.3419, + "step": 256, + "teacher_loss": 0.33744117617607117 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.46521979570388794, + "learning_rate": 1.1146450773456701e-06, + "loss": 0.2633, + "step": 257, + "teacher_loss": 0.2408798187971115 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3050382733345032, + "learning_rate": 1.1189822177244471e-06, + "loss": 0.3395, + "step": 258, + "teacher_loss": 0.34336796402931213 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.45095938444137573, + "learning_rate": 1.1233193581032241e-06, + "loss": 0.3171, + "step": 259, + "teacher_loss": 0.3022615313529968 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4104680120944977, + "learning_rate": 1.127656498482001e-06, + "loss": 0.3668, + "step": 260, + "teacher_loss": 0.36197105050086975 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5469045042991638, + "learning_rate": 1.1319936388607779e-06, + "loss": 0.3028, + "step": 261, + "teacher_loss": 0.2756377160549164 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3915804922580719, + "learning_rate": 1.1363307792395547e-06, + "loss": 0.2776, + "step": 262, + "teacher_loss": 0.2649552524089813 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.22109833359718323, + "learning_rate": 1.1406679196183316e-06, + "loss": 0.2476, + "step": 263, + "teacher_loss": 0.2505549192428589 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4426504671573639, + "learning_rate": 1.1450050599971086e-06, + "loss": 0.4039, + "step": 264, + "teacher_loss": 0.3996136784553528 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5688486099243164, + "learning_rate": 1.1493422003758856e-06, + "loss": 0.3441, + "step": 265, + "teacher_loss": 0.3190777003765106 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5123586058616638, + "learning_rate": 1.1536793407546624e-06, + "loss": 0.3118, + "step": 266, + "teacher_loss": 0.2895471155643463 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.6764093637466431, + "learning_rate": 1.1580164811334394e-06, + "loss": 0.4376, + "step": 267, + "teacher_loss": 0.41103094816207886 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.578113317489624, + "learning_rate": 1.1623536215122162e-06, + "loss": 0.5465, + "step": 268, + "teacher_loss": 0.5429891347885132 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.9408819079399109, + "learning_rate": 1.1666907618909934e-06, + "loss": 0.3645, + "step": 269, + "teacher_loss": 0.3004879653453827 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3857704997062683, + "learning_rate": 1.1710279022697701e-06, + "loss": 0.1969, + "step": 270, + "teacher_loss": 0.17585934698581696 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5578367710113525, + "learning_rate": 1.1753650426485471e-06, + "loss": 0.288, + "step": 271, + "teacher_loss": 0.25804078578948975 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3610136806964874, + "learning_rate": 1.1797021830273239e-06, + "loss": 0.2635, + "step": 272, + "teacher_loss": 0.25263839960098267 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4490371644496918, + "learning_rate": 1.1840393234061009e-06, + "loss": 0.2865, + "step": 273, + "teacher_loss": 0.26838675141334534 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.18405231833457947, + "learning_rate": 1.1883764637848779e-06, + "loss": 0.188, + "step": 274, + "teacher_loss": 0.18840843439102173 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3971933126449585, + "learning_rate": 1.1927136041636549e-06, + "loss": 0.3303, + "step": 275, + "teacher_loss": 0.3228839337825775 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.43243539333343506, + "learning_rate": 1.1970507445424318e-06, + "loss": 0.2265, + "step": 276, + "teacher_loss": 0.20363551378250122 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.18577873706817627, + "learning_rate": 1.2013878849212086e-06, + "loss": 0.1833, + "step": 277, + "teacher_loss": 0.18302816152572632 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.48472264409065247, + "learning_rate": 1.2057250252999856e-06, + "loss": 0.2076, + "step": 278, + "teacher_loss": 0.1768307089805603 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.8165194988250732, + "learning_rate": 1.2100621656787624e-06, + "loss": 0.498, + "step": 279, + "teacher_loss": 0.46261414885520935 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5283606052398682, + "learning_rate": 1.2143993060575396e-06, + "loss": 0.321, + "step": 280, + "teacher_loss": 0.29796096682548523 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.43578922748565674, + "learning_rate": 1.2187364464363164e-06, + "loss": 0.2935, + "step": 281, + "teacher_loss": 0.27770906686782837 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.21357658505439758, + "learning_rate": 1.2230735868150933e-06, + "loss": 0.225, + "step": 282, + "teacher_loss": 0.22626854479312897 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.38164523243904114, + "learning_rate": 1.2274107271938701e-06, + "loss": 0.3308, + "step": 283, + "teacher_loss": 0.32512742280960083 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4603731036186218, + "learning_rate": 1.231747867572647e-06, + "loss": 0.3014, + "step": 284, + "teacher_loss": 0.2837747037410736 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4667670726776123, + "learning_rate": 1.2360850079514239e-06, + "loss": 0.2557, + "step": 285, + "teacher_loss": 0.23228047788143158 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.654017448425293, + "learning_rate": 1.240422148330201e-06, + "loss": 0.7453, + "step": 286, + "teacher_loss": 0.7554295063018799 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.6986708641052246, + "learning_rate": 1.2447592887089778e-06, + "loss": 0.2876, + "step": 287, + "teacher_loss": 0.24195826053619385 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3751755356788635, + "learning_rate": 1.2490964290877548e-06, + "loss": 0.2488, + "step": 288, + "teacher_loss": 0.23473919928073883 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4674665629863739, + "learning_rate": 1.2534335694665318e-06, + "loss": 0.3623, + "step": 289, + "teacher_loss": 0.3506540060043335 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.9339957237243652, + "learning_rate": 1.2577707098453086e-06, + "loss": 0.447, + "step": 290, + "teacher_loss": 0.39291030168533325 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.32550048828125, + "learning_rate": 1.2621078502240858e-06, + "loss": 0.2291, + "step": 291, + "teacher_loss": 0.21833573281764984 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.2830648720264435, + "learning_rate": 1.2664449906028626e-06, + "loss": 0.26, + "step": 292, + "teacher_loss": 0.257387638092041 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5135868191719055, + "learning_rate": 1.2707821309816396e-06, + "loss": 0.311, + "step": 293, + "teacher_loss": 0.28852254152297974 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.6661826968193054, + "learning_rate": 1.2751192713604163e-06, + "loss": 0.3246, + "step": 294, + "teacher_loss": 0.28665751218795776 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.10539337992668152, + "learning_rate": 1.2794564117391933e-06, + "loss": 0.2193, + "step": 295, + "teacher_loss": 0.23196059465408325 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.4741356372833252, + "learning_rate": 1.2837935521179703e-06, + "loss": 0.3949, + "step": 296, + "teacher_loss": 0.386050820350647 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.19788937270641327, + "learning_rate": 1.2881306924967473e-06, + "loss": 0.2118, + "step": 297, + "teacher_loss": 0.21335843205451965 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.2720310688018799, + "learning_rate": 1.292467832875524e-06, + "loss": 0.3443, + "step": 298, + "teacher_loss": 0.35228031873703003 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.5976897478103638, + "learning_rate": 1.296804973254301e-06, + "loss": 0.5105, + "step": 299, + "teacher_loss": 0.5007622241973877 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.29341644048690796, + "learning_rate": 1.3011421136330778e-06, + "loss": 0.2938, + "step": 300, + "teacher_loss": 0.2938333749771118 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.3075222373008728, + "learning_rate": 1.3054792540118548e-06, + "loss": 0.2691, + "step": 301, + "teacher_loss": 0.2647859454154968 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.49865618348121643, + "learning_rate": 1.3098163943906318e-06, + "loss": 0.2604, + "step": 302, + "teacher_loss": 0.23394174873828888 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.48242807388305664, + "learning_rate": 1.3141535347694088e-06, + "loss": 0.3184, + "step": 303, + "teacher_loss": 0.300139844417572 + }, + { + "compression_loss": 0.0, + "epoch": 0.05, + "label_loss": 0.15943261981010437, + "learning_rate": 1.3184906751481856e-06, + "loss": 0.2188, + "step": 304, + "teacher_loss": 0.22538039088249207 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6063594818115234, + "learning_rate": 1.3228278155269626e-06, + "loss": 0.3055, + "step": 305, + "teacher_loss": 0.2721177339553833 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.36577171087265015, + "learning_rate": 1.3271649559057395e-06, + "loss": 0.2231, + "step": 306, + "teacher_loss": 0.20729850232601166 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6283776164054871, + "learning_rate": 1.3315020962845165e-06, + "loss": 0.3843, + "step": 307, + "teacher_loss": 0.35716527700424194 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5405994653701782, + "learning_rate": 1.3358392366632935e-06, + "loss": 0.3138, + "step": 308, + "teacher_loss": 0.2886366844177246 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5965635776519775, + "learning_rate": 1.3401763770420703e-06, + "loss": 0.2536, + "step": 309, + "teacher_loss": 0.2154558002948761 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5021859407424927, + "learning_rate": 1.3445135174208473e-06, + "loss": 0.3334, + "step": 310, + "teacher_loss": 0.31462663412094116 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.4530951976776123, + "learning_rate": 1.348850657799624e-06, + "loss": 0.3532, + "step": 311, + "teacher_loss": 0.3420509397983551 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6128342151641846, + "learning_rate": 1.353187798178401e-06, + "loss": 0.3002, + "step": 312, + "teacher_loss": 0.26549673080444336 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6090314388275146, + "learning_rate": 1.357524938557178e-06, + "loss": 0.3407, + "step": 313, + "teacher_loss": 0.31087803840637207 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.22318555414676666, + "learning_rate": 1.361862078935955e-06, + "loss": 0.2475, + "step": 314, + "teacher_loss": 0.25018370151519775 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.24272391200065613, + "learning_rate": 1.3661992193147318e-06, + "loss": 0.241, + "step": 315, + "teacher_loss": 0.2408469021320343 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6072691679000854, + "learning_rate": 1.3705363596935088e-06, + "loss": 0.3729, + "step": 316, + "teacher_loss": 0.3468794524669647 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.179803729057312, + "learning_rate": 1.3748735000722855e-06, + "loss": 0.1928, + "step": 317, + "teacher_loss": 0.19420017302036285 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.8709033727645874, + "learning_rate": 1.3792106404510627e-06, + "loss": 0.3194, + "step": 318, + "teacher_loss": 0.2581326961517334 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.24580241739749908, + "learning_rate": 1.3835477808298395e-06, + "loss": 0.2602, + "step": 319, + "teacher_loss": 0.2618195712566376 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.42237046360969543, + "learning_rate": 1.3878849212086165e-06, + "loss": 0.6006, + "step": 320, + "teacher_loss": 0.6204584836959839 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.7018339037895203, + "learning_rate": 1.3922220615873935e-06, + "loss": 0.321, + "step": 321, + "teacher_loss": 0.2787395119667053 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2072555273771286, + "learning_rate": 1.3965592019661703e-06, + "loss": 0.2243, + "step": 322, + "teacher_loss": 0.2262054681777954 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.49878421425819397, + "learning_rate": 1.4008963423449473e-06, + "loss": 0.3428, + "step": 323, + "teacher_loss": 0.32551002502441406 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.49750423431396484, + "learning_rate": 1.4052334827237242e-06, + "loss": 0.2733, + "step": 324, + "teacher_loss": 0.2483411729335785 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.47724393010139465, + "learning_rate": 1.4095706231025012e-06, + "loss": 0.3038, + "step": 325, + "teacher_loss": 0.2844970226287842 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2629703879356384, + "learning_rate": 1.413907763481278e-06, + "loss": 0.2195, + "step": 326, + "teacher_loss": 0.21464210748672485 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.4069712162017822, + "learning_rate": 1.418244903860055e-06, + "loss": 0.2487, + "step": 327, + "teacher_loss": 0.231143057346344 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3577013611793518, + "learning_rate": 1.4225820442388318e-06, + "loss": 0.2773, + "step": 328, + "teacher_loss": 0.26837706565856934 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2630511522293091, + "learning_rate": 1.426919184617609e-06, + "loss": 0.3246, + "step": 329, + "teacher_loss": 0.3313871920108795 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3583536446094513, + "learning_rate": 1.4312563249963857e-06, + "loss": 0.2763, + "step": 330, + "teacher_loss": 0.26717931032180786 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6040447354316711, + "learning_rate": 1.4355934653751627e-06, + "loss": 0.3525, + "step": 331, + "teacher_loss": 0.324535071849823 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3912610411643982, + "learning_rate": 1.4399306057539395e-06, + "loss": 0.3091, + "step": 332, + "teacher_loss": 0.2999173402786255 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.217939555644989, + "learning_rate": 1.4442677461327165e-06, + "loss": 0.2451, + "step": 333, + "teacher_loss": 0.24812567234039307 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5532275438308716, + "learning_rate": 1.4486048865114935e-06, + "loss": 0.4276, + "step": 334, + "teacher_loss": 0.41367918252944946 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5887179374694824, + "learning_rate": 1.4529420268902705e-06, + "loss": 0.3989, + "step": 335, + "teacher_loss": 0.3777720034122467 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5182253122329712, + "learning_rate": 1.4572791672690472e-06, + "loss": 0.2488, + "step": 336, + "teacher_loss": 0.21885734796524048 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5497493743896484, + "learning_rate": 1.4616163076478242e-06, + "loss": 0.2501, + "step": 337, + "teacher_loss": 0.2168208807706833 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.6350235939025879, + "learning_rate": 1.4659534480266012e-06, + "loss": 0.3647, + "step": 338, + "teacher_loss": 0.3346971273422241 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.4286232888698578, + "learning_rate": 1.470290588405378e-06, + "loss": 0.2719, + "step": 339, + "teacher_loss": 0.25453823804855347 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.597405195236206, + "learning_rate": 1.4746277287841552e-06, + "loss": 0.2909, + "step": 340, + "teacher_loss": 0.2568548321723938 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5068219304084778, + "learning_rate": 1.478964869162932e-06, + "loss": 0.2928, + "step": 341, + "teacher_loss": 0.2690381705760956 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3326978087425232, + "learning_rate": 1.483302009541709e-06, + "loss": 0.2937, + "step": 342, + "teacher_loss": 0.2893740236759186 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3101786971092224, + "learning_rate": 1.4876391499204857e-06, + "loss": 0.3049, + "step": 343, + "teacher_loss": 0.3043462038040161 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.653206467628479, + "learning_rate": 1.4919762902992627e-06, + "loss": 0.311, + "step": 344, + "teacher_loss": 0.27302148938179016 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.37953609228134155, + "learning_rate": 1.4963134306780397e-06, + "loss": 0.2532, + "step": 345, + "teacher_loss": 0.23919625580310822 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2678660452365875, + "learning_rate": 1.5006505710568167e-06, + "loss": 0.2476, + "step": 346, + "teacher_loss": 0.245295912027359 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.4508707821369171, + "learning_rate": 1.5049877114355935e-06, + "loss": 0.2403, + "step": 347, + "teacher_loss": 0.2168768048286438 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.47383183240890503, + "learning_rate": 1.5093248518143704e-06, + "loss": 0.2995, + "step": 348, + "teacher_loss": 0.28011664748191833 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3042389452457428, + "learning_rate": 1.5136619921931472e-06, + "loss": 0.3169, + "step": 349, + "teacher_loss": 0.31831949949264526 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.12408284842967987, + "learning_rate": 1.5179991325719242e-06, + "loss": 0.5567, + "step": 350, + "teacher_loss": 0.6047139167785645 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5269701480865479, + "learning_rate": 1.5223362729507012e-06, + "loss": 0.2722, + "step": 351, + "teacher_loss": 0.2439274787902832 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.5357444882392883, + "learning_rate": 1.5266734133294782e-06, + "loss": 0.327, + "step": 352, + "teacher_loss": 0.30385881662368774 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.34682193398475647, + "learning_rate": 1.531010553708255e-06, + "loss": 0.3136, + "step": 353, + "teacher_loss": 0.30985987186431885 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.3019476532936096, + "learning_rate": 1.535347694087032e-06, + "loss": 0.2591, + "step": 354, + "teacher_loss": 0.25438013672828674 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.1920035183429718, + "learning_rate": 1.539684834465809e-06, + "loss": 0.2727, + "step": 355, + "teacher_loss": 0.2816593050956726 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2970641255378723, + "learning_rate": 1.544021974844586e-06, + "loss": 0.2986, + "step": 356, + "teacher_loss": 0.2987341284751892 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.4337921738624573, + "learning_rate": 1.548359115223363e-06, + "loss": 0.4537, + "step": 357, + "teacher_loss": 0.4558786451816559 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2025834023952484, + "learning_rate": 1.5526962556021397e-06, + "loss": 0.2338, + "step": 358, + "teacher_loss": 0.23727522790431976 + }, + { + "compression_loss": 0.0, + "epoch": 0.06, + "label_loss": 0.2037421315908432, + "learning_rate": 1.5570333959809167e-06, + "loss": 0.2828, + "step": 359, + "teacher_loss": 0.29158878326416016 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.6193740367889404, + "learning_rate": 1.5613705363596934e-06, + "loss": 0.3379, + "step": 360, + "teacher_loss": 0.3066261410713196 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.721538782119751, + "learning_rate": 1.5657076767384704e-06, + "loss": 0.4034, + "step": 361, + "teacher_loss": 0.36805254220962524 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.7160407304763794, + "learning_rate": 1.5700448171172474e-06, + "loss": 0.29, + "step": 362, + "teacher_loss": 0.24262619018554688 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.37309473752975464, + "learning_rate": 1.5743819574960244e-06, + "loss": 0.2789, + "step": 363, + "teacher_loss": 0.2683855891227722 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.42594701051712036, + "learning_rate": 1.5787190978748012e-06, + "loss": 0.3065, + "step": 364, + "teacher_loss": 0.293210506439209 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.40625637769699097, + "learning_rate": 1.5830562382535782e-06, + "loss": 0.2443, + "step": 365, + "teacher_loss": 0.22632664442062378 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.24801987409591675, + "learning_rate": 1.587393378632355e-06, + "loss": 0.2364, + "step": 366, + "teacher_loss": 0.23507001996040344 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3892616033554077, + "learning_rate": 1.5917305190111321e-06, + "loss": 0.2747, + "step": 367, + "teacher_loss": 0.2619546949863434 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.2886156141757965, + "learning_rate": 1.596067659389909e-06, + "loss": 0.2641, + "step": 368, + "teacher_loss": 0.26142188906669617 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5245214700698853, + "learning_rate": 1.600404799768686e-06, + "loss": 0.2673, + "step": 369, + "teacher_loss": 0.2386949360370636 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.38306134939193726, + "learning_rate": 1.6047419401474629e-06, + "loss": 0.4083, + "step": 370, + "teacher_loss": 0.4110714793205261 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.4856521785259247, + "learning_rate": 1.6090790805262397e-06, + "loss": 0.2535, + "step": 371, + "teacher_loss": 0.2276982069015503 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3180727958679199, + "learning_rate": 1.6134162209050169e-06, + "loss": 0.2837, + "step": 372, + "teacher_loss": 0.2799244523048401 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3749374747276306, + "learning_rate": 1.6177533612837936e-06, + "loss": 0.2494, + "step": 373, + "teacher_loss": 0.2354069948196411 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.4214995801448822, + "learning_rate": 1.6220905016625706e-06, + "loss": 0.2711, + "step": 374, + "teacher_loss": 0.2543748617172241 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.40043288469314575, + "learning_rate": 1.6264276420413474e-06, + "loss": 0.2743, + "step": 375, + "teacher_loss": 0.26031285524368286 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 1.037966251373291, + "learning_rate": 1.6307647824201244e-06, + "loss": 0.3784, + "step": 376, + "teacher_loss": 0.30508655309677124 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.33204352855682373, + "learning_rate": 1.6351019227989012e-06, + "loss": 0.4159, + "step": 377, + "teacher_loss": 0.4251980185508728 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.19326746463775635, + "learning_rate": 1.6394390631776784e-06, + "loss": 0.2566, + "step": 378, + "teacher_loss": 0.26364821195602417 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.20111048221588135, + "learning_rate": 1.6437762035564551e-06, + "loss": 0.196, + "step": 379, + "teacher_loss": 0.19547748565673828 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.15460160374641418, + "learning_rate": 1.6481133439352321e-06, + "loss": 0.2378, + "step": 380, + "teacher_loss": 0.24707093834877014 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5643184185028076, + "learning_rate": 1.6524504843140089e-06, + "loss": 0.5085, + "step": 381, + "teacher_loss": 0.5023406744003296 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.46036678552627563, + "learning_rate": 1.6567876246927859e-06, + "loss": 0.2234, + "step": 382, + "teacher_loss": 0.1970236748456955 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.393317848443985, + "learning_rate": 1.6611247650715629e-06, + "loss": 0.3106, + "step": 383, + "teacher_loss": 0.3013565242290497 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3466640114784241, + "learning_rate": 1.6654619054503399e-06, + "loss": 0.4081, + "step": 384, + "teacher_loss": 0.41491347551345825 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.42049169540405273, + "learning_rate": 1.6697990458291166e-06, + "loss": 0.2571, + "step": 385, + "teacher_loss": 0.23896369338035583 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.4995405375957489, + "learning_rate": 1.6741361862078936e-06, + "loss": 0.3218, + "step": 386, + "teacher_loss": 0.3020709156990051 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3334900736808777, + "learning_rate": 1.6784733265866706e-06, + "loss": 0.2438, + "step": 387, + "teacher_loss": 0.23387368023395538 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.41797807812690735, + "learning_rate": 1.6828104669654474e-06, + "loss": 0.394, + "step": 388, + "teacher_loss": 0.39138734340667725 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.38038504123687744, + "learning_rate": 1.6871476073442246e-06, + "loss": 0.2249, + "step": 389, + "teacher_loss": 0.20758700370788574 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5758942365646362, + "learning_rate": 1.6914847477230013e-06, + "loss": 0.6417, + "step": 390, + "teacher_loss": 0.6490577459335327 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5764553546905518, + "learning_rate": 1.6958218881017783e-06, + "loss": 0.3365, + "step": 391, + "teacher_loss": 0.30982881784439087 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.408348023891449, + "learning_rate": 1.7001590284805551e-06, + "loss": 0.2754, + "step": 392, + "teacher_loss": 0.2605964243412018 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.6151769161224365, + "learning_rate": 1.704496168859332e-06, + "loss": 0.3425, + "step": 393, + "teacher_loss": 0.31217846274375916 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.23972713947296143, + "learning_rate": 1.708833309238109e-06, + "loss": 0.2535, + "step": 394, + "teacher_loss": 0.25504136085510254 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.16879598796367645, + "learning_rate": 1.713170449616886e-06, + "loss": 0.2167, + "step": 395, + "teacher_loss": 0.2219766527414322 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.26204800605773926, + "learning_rate": 1.7175075899956628e-06, + "loss": 0.2036, + "step": 396, + "teacher_loss": 0.197129487991333 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.43005844950675964, + "learning_rate": 1.7218447303744398e-06, + "loss": 0.2262, + "step": 397, + "teacher_loss": 0.20354320108890533 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3438878655433655, + "learning_rate": 1.7261818707532166e-06, + "loss": 0.2442, + "step": 398, + "teacher_loss": 0.23312923312187195 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.38927507400512695, + "learning_rate": 1.7305190111319936e-06, + "loss": 0.1928, + "step": 399, + "teacher_loss": 0.17091867327690125 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3770056962966919, + "learning_rate": 1.7348561515107706e-06, + "loss": 0.27, + "step": 400, + "teacher_loss": 0.25815635919570923 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.7730339765548706, + "learning_rate": 1.7391932918895476e-06, + "loss": 0.3726, + "step": 401, + "teacher_loss": 0.3281252384185791 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.4280671775341034, + "learning_rate": 1.7435304322683246e-06, + "loss": 0.2508, + "step": 402, + "teacher_loss": 0.23111285269260406 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.20550131797790527, + "learning_rate": 1.7478675726471013e-06, + "loss": 0.2005, + "step": 403, + "teacher_loss": 0.19990915060043335 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.48787587881088257, + "learning_rate": 1.7522047130258783e-06, + "loss": 0.4844, + "step": 404, + "teacher_loss": 0.48398470878601074 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.2736685574054718, + "learning_rate": 1.7565418534046553e-06, + "loss": 0.4097, + "step": 405, + "teacher_loss": 0.4247642755508423 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5428891777992249, + "learning_rate": 1.7608789937834323e-06, + "loss": 0.3313, + "step": 406, + "teacher_loss": 0.3077879250049591 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.2727331519126892, + "learning_rate": 1.765216134162209e-06, + "loss": 0.2919, + "step": 407, + "teacher_loss": 0.29404160380363464 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.330168753862381, + "learning_rate": 1.769553274540986e-06, + "loss": 0.2686, + "step": 408, + "teacher_loss": 0.26173800230026245 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5277129411697388, + "learning_rate": 1.7738904149197628e-06, + "loss": 0.4467, + "step": 409, + "teacher_loss": 0.4377474784851074 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.3135142922401428, + "learning_rate": 1.77822755529854e-06, + "loss": 0.2187, + "step": 410, + "teacher_loss": 0.20820513367652893 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.2130378931760788, + "learning_rate": 1.7825646956773168e-06, + "loss": 0.2234, + "step": 411, + "teacher_loss": 0.22456574440002441 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.7675861120223999, + "learning_rate": 1.7869018360560938e-06, + "loss": 0.3381, + "step": 412, + "teacher_loss": 0.2903895974159241 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.4666363596916199, + "learning_rate": 1.7912389764348706e-06, + "loss": 0.2097, + "step": 413, + "teacher_loss": 0.18114443123340607 + }, + { + "compression_loss": 0.0, + "epoch": 0.07, + "label_loss": 0.5393398404121399, + "learning_rate": 1.7955761168136475e-06, + "loss": 0.2752, + "step": 414, + "teacher_loss": 0.24581646919250488 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.29600080847740173, + "learning_rate": 1.7999132571924243e-06, + "loss": 0.2741, + "step": 415, + "teacher_loss": 0.2717086672782898 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.3500150144100189, + "learning_rate": 1.8042503975712015e-06, + "loss": 0.2643, + "step": 416, + "teacher_loss": 0.25477874279022217 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.5084381699562073, + "learning_rate": 1.8085875379499783e-06, + "loss": 0.363, + "step": 417, + "teacher_loss": 0.3468474745750427 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.49075108766555786, + "learning_rate": 1.8129246783287553e-06, + "loss": 0.3361, + "step": 418, + "teacher_loss": 0.3189225196838379 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.327508807182312, + "learning_rate": 1.8172618187075323e-06, + "loss": 0.2164, + "step": 419, + "teacher_loss": 0.2040756493806839 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 1.1091556549072266, + "learning_rate": 1.821598959086309e-06, + "loss": 0.4772, + "step": 420, + "teacher_loss": 0.4069884717464447 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.6659070253372192, + "learning_rate": 1.8259360994650862e-06, + "loss": 0.3733, + "step": 421, + "teacher_loss": 0.34075677394866943 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.34834855794906616, + "learning_rate": 1.830273239843863e-06, + "loss": 0.2507, + "step": 422, + "teacher_loss": 0.23987625539302826 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.6248312592506409, + "learning_rate": 1.83461038022264e-06, + "loss": 0.2723, + "step": 423, + "teacher_loss": 0.233115553855896 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4233854413032532, + "learning_rate": 1.8389475206014168e-06, + "loss": 0.195, + "step": 424, + "teacher_loss": 0.16966009140014648 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.21375705301761627, + "learning_rate": 1.8432846609801938e-06, + "loss": 0.273, + "step": 425, + "teacher_loss": 0.27958256006240845 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4494283199310303, + "learning_rate": 1.8476218013589705e-06, + "loss": 0.2123, + "step": 426, + "teacher_loss": 0.18590784072875977 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.6090162396430969, + "learning_rate": 1.8519589417377477e-06, + "loss": 0.3485, + "step": 427, + "teacher_loss": 0.319607675075531 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.44910502433776855, + "learning_rate": 1.8562960821165245e-06, + "loss": 0.3034, + "step": 428, + "teacher_loss": 0.28720515966415405 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.18220269680023193, + "learning_rate": 1.8606332224953015e-06, + "loss": 0.2636, + "step": 429, + "teacher_loss": 0.2726060748100281 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.6217266917228699, + "learning_rate": 1.8649703628740783e-06, + "loss": 0.2855, + "step": 430, + "teacher_loss": 0.2481021285057068 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.2947242856025696, + "learning_rate": 1.8693075032528553e-06, + "loss": 0.3036, + "step": 431, + "teacher_loss": 0.304562509059906 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.7192350625991821, + "learning_rate": 1.8736446436316323e-06, + "loss": 0.2519, + "step": 432, + "teacher_loss": 0.199924498796463 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.24471047520637512, + "learning_rate": 1.8779817840104092e-06, + "loss": 0.2277, + "step": 433, + "teacher_loss": 0.2258528470993042 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.2768610715866089, + "learning_rate": 1.8823189243891862e-06, + "loss": 0.2634, + "step": 434, + "teacher_loss": 0.26193076372146606 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.44071686267852783, + "learning_rate": 1.886656064767963e-06, + "loss": 0.2564, + "step": 435, + "teacher_loss": 0.2359606921672821 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.22673246264457703, + "learning_rate": 1.89099320514674e-06, + "loss": 0.3108, + "step": 436, + "teacher_loss": 0.32010000944137573 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.9465312361717224, + "learning_rate": 1.8953303455255168e-06, + "loss": 0.7238, + "step": 437, + "teacher_loss": 0.6990618705749512 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.7088655233383179, + "learning_rate": 1.8996674859042937e-06, + "loss": 0.2849, + "step": 438, + "teacher_loss": 0.23781141638755798 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.32122692465782166, + "learning_rate": 1.9040046262830705e-06, + "loss": 0.2542, + "step": 439, + "teacher_loss": 0.2468041330575943 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.19299942255020142, + "learning_rate": 1.908341766661848e-06, + "loss": 0.2949, + "step": 440, + "teacher_loss": 0.3062227964401245 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.32944902777671814, + "learning_rate": 1.9126789070406247e-06, + "loss": 0.3241, + "step": 441, + "teacher_loss": 0.32347506284713745 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.19571147859096527, + "learning_rate": 1.9170160474194015e-06, + "loss": 0.2531, + "step": 442, + "teacher_loss": 0.2594839930534363 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.2997242212295532, + "learning_rate": 1.9213531877981787e-06, + "loss": 0.2433, + "step": 443, + "teacher_loss": 0.2370450645685196 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.2977994680404663, + "learning_rate": 1.9256903281769555e-06, + "loss": 0.5177, + "step": 444, + "teacher_loss": 0.5421225428581238 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.5032090544700623, + "learning_rate": 1.9300274685557322e-06, + "loss": 0.2675, + "step": 445, + "teacher_loss": 0.2413441240787506 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.37059757113456726, + "learning_rate": 1.934364608934509e-06, + "loss": 0.1822, + "step": 446, + "teacher_loss": 0.16121342778205872 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.24433265626430511, + "learning_rate": 1.938701749313286e-06, + "loss": 0.2393, + "step": 447, + "teacher_loss": 0.23873546719551086 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.5554165840148926, + "learning_rate": 1.943038889692063e-06, + "loss": 0.2665, + "step": 448, + "teacher_loss": 0.23443107306957245 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.66746985912323, + "learning_rate": 1.9473760300708398e-06, + "loss": 0.4268, + "step": 449, + "teacher_loss": 0.40007448196411133 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.31764930486679077, + "learning_rate": 1.951713170449617e-06, + "loss": 0.2901, + "step": 450, + "teacher_loss": 0.2870023548603058 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.37764155864715576, + "learning_rate": 1.956050310828394e-06, + "loss": 0.2791, + "step": 451, + "teacher_loss": 0.2681971490383148 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.43080389499664307, + "learning_rate": 1.960387451207171e-06, + "loss": 0.2807, + "step": 452, + "teacher_loss": 0.2640213370323181 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.47938570380210876, + "learning_rate": 1.9647245915859477e-06, + "loss": 0.6168, + "step": 453, + "teacher_loss": 0.6321121454238892 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.32987815141677856, + "learning_rate": 1.969061731964725e-06, + "loss": 0.1858, + "step": 454, + "teacher_loss": 0.16981291770935059 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.5893688201904297, + "learning_rate": 1.9733988723435017e-06, + "loss": 0.2976, + "step": 455, + "teacher_loss": 0.26523154973983765 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4144860804080963, + "learning_rate": 1.9777360127222785e-06, + "loss": 0.2705, + "step": 456, + "teacher_loss": 0.2545433044433594 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.9025605916976929, + "learning_rate": 1.9820731531010552e-06, + "loss": 0.4697, + "step": 457, + "teacher_loss": 0.42164015769958496 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4072743058204651, + "learning_rate": 1.9864102934798324e-06, + "loss": 0.291, + "step": 458, + "teacher_loss": 0.2780720293521881 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.36306989192962646, + "learning_rate": 1.990747433858609e-06, + "loss": 0.2436, + "step": 459, + "teacher_loss": 0.23030637204647064 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.5169907212257385, + "learning_rate": 1.995084574237386e-06, + "loss": 0.3379, + "step": 460, + "teacher_loss": 0.3180461823940277 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.6494100093841553, + "learning_rate": 1.9994217146161628e-06, + "loss": 0.2999, + "step": 461, + "teacher_loss": 0.2610490620136261 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.3072289824485779, + "learning_rate": 2.0037588549949404e-06, + "loss": 0.2342, + "step": 462, + "teacher_loss": 0.22611698508262634 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.3994767665863037, + "learning_rate": 2.008095995373717e-06, + "loss": 0.3317, + "step": 463, + "teacher_loss": 0.32413503527641296 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4888478219509125, + "learning_rate": 2.012433135752494e-06, + "loss": 0.2647, + "step": 464, + "teacher_loss": 0.23980139195919037 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4228232800960541, + "learning_rate": 2.0167702761312707e-06, + "loss": 0.2307, + "step": 465, + "teacher_loss": 0.2093290090560913 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.629534125328064, + "learning_rate": 2.021107416510048e-06, + "loss": 0.2641, + "step": 466, + "teacher_loss": 0.22347673773765564 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.7569173574447632, + "learning_rate": 2.0254445568888247e-06, + "loss": 0.348, + "step": 467, + "teacher_loss": 0.3025675415992737 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.4804191589355469, + "learning_rate": 2.0297816972676014e-06, + "loss": 0.2235, + "step": 468, + "teacher_loss": 0.19495120644569397 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.2551216185092926, + "learning_rate": 2.0341188376463786e-06, + "loss": 0.2386, + "step": 469, + "teacher_loss": 0.23679262399673462 + }, + { + "compression_loss": 0.0, + "epoch": 0.08, + "label_loss": 0.2082975208759308, + "learning_rate": 2.0384559780251554e-06, + "loss": 0.1883, + "step": 470, + "teacher_loss": 0.18602964282035828 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.2937382757663727, + "learning_rate": 2.042793118403932e-06, + "loss": 0.2065, + "step": 471, + "teacher_loss": 0.19685763120651245 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.18604007363319397, + "learning_rate": 2.047130258782709e-06, + "loss": 0.4785, + "step": 472, + "teacher_loss": 0.5109646320343018 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.2973581850528717, + "learning_rate": 2.0514673991614866e-06, + "loss": 0.2446, + "step": 473, + "teacher_loss": 0.23872309923171997 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5202877521514893, + "learning_rate": 2.0558045395402634e-06, + "loss": 0.2232, + "step": 474, + "teacher_loss": 0.19023792445659637 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5331915616989136, + "learning_rate": 2.06014167991904e-06, + "loss": 0.2618, + "step": 475, + "teacher_loss": 0.2316759079694748 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.277387797832489, + "learning_rate": 2.064478820297817e-06, + "loss": 0.206, + "step": 476, + "teacher_loss": 0.19810503721237183 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.486581027507782, + "learning_rate": 2.068815960676594e-06, + "loss": 0.487, + "step": 477, + "teacher_loss": 0.4870527982711792 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.38122931122779846, + "learning_rate": 2.073153101055371e-06, + "loss": 0.2039, + "step": 478, + "teacher_loss": 0.18414457142353058 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.2526359558105469, + "learning_rate": 2.0774902414341477e-06, + "loss": 0.2823, + "step": 479, + "teacher_loss": 0.28556084632873535 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.22659647464752197, + "learning_rate": 2.0818273818129244e-06, + "loss": 0.2134, + "step": 480, + "teacher_loss": 0.21197384595870972 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.1992442011833191, + "learning_rate": 2.0861645221917016e-06, + "loss": 0.1937, + "step": 481, + "teacher_loss": 0.19306373596191406 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5664013624191284, + "learning_rate": 2.0905016625704784e-06, + "loss": 0.3027, + "step": 482, + "teacher_loss": 0.27343302965164185 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.3245323896408081, + "learning_rate": 2.094838802949255e-06, + "loss": 0.2011, + "step": 483, + "teacher_loss": 0.18735739588737488 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.9848895072937012, + "learning_rate": 2.099175943328033e-06, + "loss": 0.4683, + "step": 484, + "teacher_loss": 0.4109257757663727 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5511682629585266, + "learning_rate": 2.1035130837068096e-06, + "loss": 0.5622, + "step": 485, + "teacher_loss": 0.5634675025939941 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.593747615814209, + "learning_rate": 2.1078502240855864e-06, + "loss": 0.204, + "step": 486, + "teacher_loss": 0.16068899631500244 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.2801670432090759, + "learning_rate": 2.112187364464363e-06, + "loss": 0.2083, + "step": 487, + "teacher_loss": 0.20030780136585236 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.3808116912841797, + "learning_rate": 2.1165245048431403e-06, + "loss": 0.2938, + "step": 488, + "teacher_loss": 0.2841397523880005 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.27547040581703186, + "learning_rate": 2.120861645221917e-06, + "loss": 0.2648, + "step": 489, + "teacher_loss": 0.26362812519073486 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.7601871490478516, + "learning_rate": 2.125198785600694e-06, + "loss": 0.3349, + "step": 490, + "teacher_loss": 0.2876013517379761 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.3447526693344116, + "learning_rate": 2.1295359259794707e-06, + "loss": 0.1761, + "step": 491, + "teacher_loss": 0.1573595404624939 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.4270593523979187, + "learning_rate": 2.133873066358248e-06, + "loss": 0.45, + "step": 492, + "teacher_loss": 0.45252642035484314 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.4965693950653076, + "learning_rate": 2.1382102067370246e-06, + "loss": 0.2625, + "step": 493, + "teacher_loss": 0.23646843433380127 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.4682677090167999, + "learning_rate": 2.1425473471158014e-06, + "loss": 0.2653, + "step": 494, + "teacher_loss": 0.2427406907081604 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.18518725037574768, + "learning_rate": 2.1468844874945786e-06, + "loss": 0.2164, + "step": 495, + "teacher_loss": 0.21982312202453613 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.39510196447372437, + "learning_rate": 2.151221627873356e-06, + "loss": 0.2742, + "step": 496, + "teacher_loss": 0.2608032822608948 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.23207172751426697, + "learning_rate": 2.1555587682521326e-06, + "loss": 0.3791, + "step": 497, + "teacher_loss": 0.3954426050186157 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.711067795753479, + "learning_rate": 2.1598959086309094e-06, + "loss": 0.28, + "step": 498, + "teacher_loss": 0.2320762425661087 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 1.173708438873291, + "learning_rate": 2.1642330490096866e-06, + "loss": 0.3219, + "step": 499, + "teacher_loss": 0.22727572917938232 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5813969373703003, + "learning_rate": 2.1685701893884633e-06, + "loss": 0.2299, + "step": 500, + "teacher_loss": 0.19085891544818878 + }, + { + "epoch": 0.09, + "eval_exact_match": 79.93377483443709, + "eval_f1": 87.34056773477343, + "step": 500 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.2771046459674835, + "learning_rate": 2.17290732976724e-06, + "loss": 0.3274, + "step": 501, + "teacher_loss": 0.33294737339019775 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.22974810004234314, + "learning_rate": 2.177244470146017e-06, + "loss": 0.2027, + "step": 502, + "teacher_loss": 0.19970601797103882 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.6079027652740479, + "learning_rate": 2.181581610524794e-06, + "loss": 0.2981, + "step": 503, + "teacher_loss": 0.263627827167511 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.39554834365844727, + "learning_rate": 2.185918750903571e-06, + "loss": 0.2559, + "step": 504, + "teacher_loss": 0.2404191792011261 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.578843891620636, + "learning_rate": 2.190255891282348e-06, + "loss": 0.2453, + "step": 505, + "teacher_loss": 0.20824384689331055 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5476224422454834, + "learning_rate": 2.194593031661125e-06, + "loss": 0.3513, + "step": 506, + "teacher_loss": 0.32946211099624634 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.29302459955215454, + "learning_rate": 2.198930172039902e-06, + "loss": 0.2339, + "step": 507, + "teacher_loss": 0.22737549245357513 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.895563006401062, + "learning_rate": 2.203267312418679e-06, + "loss": 0.2782, + "step": 508, + "teacher_loss": 0.20957829058170319 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.4441918432712555, + "learning_rate": 2.2076044527974556e-06, + "loss": 0.3246, + "step": 509, + "teacher_loss": 0.3112950325012207 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.2914983630180359, + "learning_rate": 2.2119415931762324e-06, + "loss": 0.2769, + "step": 510, + "teacher_loss": 0.2753213942050934 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.544796347618103, + "learning_rate": 2.2162787335550096e-06, + "loss": 0.277, + "step": 511, + "teacher_loss": 0.24726511538028717 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.5013731122016907, + "learning_rate": 2.2206158739337863e-06, + "loss": 0.2505, + "step": 512, + "teacher_loss": 0.22265717387199402 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.3373941481113434, + "learning_rate": 2.224953014312563e-06, + "loss": 0.3402, + "step": 513, + "teacher_loss": 0.340520977973938 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.32549047470092773, + "learning_rate": 2.2292901546913403e-06, + "loss": 0.2519, + "step": 514, + "teacher_loss": 0.2436991184949875 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.43331462144851685, + "learning_rate": 2.233627295070117e-06, + "loss": 0.3781, + "step": 515, + "teacher_loss": 0.3719749450683594 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.48328661918640137, + "learning_rate": 2.2379644354488943e-06, + "loss": 0.2191, + "step": 516, + "teacher_loss": 0.18973436951637268 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.4888170063495636, + "learning_rate": 2.242301575827671e-06, + "loss": 0.2753, + "step": 517, + "teacher_loss": 0.25160840153694153 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.18766647577285767, + "learning_rate": 2.2466387162064482e-06, + "loss": 0.2615, + "step": 518, + "teacher_loss": 0.26968494057655334 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.9773036241531372, + "learning_rate": 2.250975856585225e-06, + "loss": 0.3672, + "step": 519, + "teacher_loss": 0.2994511127471924 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.1253749430179596, + "learning_rate": 2.255312996964002e-06, + "loss": 0.2172, + "step": 520, + "teacher_loss": 0.2274502068758011 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.9317730069160461, + "learning_rate": 2.2596501373427786e-06, + "loss": 0.4157, + "step": 521, + "teacher_loss": 0.3583186864852905 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.19191187620162964, + "learning_rate": 2.2639872777215558e-06, + "loss": 0.3742, + "step": 522, + "teacher_loss": 0.39444875717163086 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.16487179696559906, + "learning_rate": 2.2683244181003325e-06, + "loss": 0.1609, + "step": 523, + "teacher_loss": 0.16045315563678741 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.735713005065918, + "learning_rate": 2.2726615584791093e-06, + "loss": 0.295, + "step": 524, + "teacher_loss": 0.24605339765548706 + }, + { + "compression_loss": 0.0, + "epoch": 0.09, + "label_loss": 0.9322723150253296, + "learning_rate": 2.276998698857886e-06, + "loss": 0.5034, + "step": 525, + "teacher_loss": 0.4557216167449951 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3997858166694641, + "learning_rate": 2.2813358392366633e-06, + "loss": 0.2411, + "step": 526, + "teacher_loss": 0.2234981507062912 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.5708207488059998, + "learning_rate": 2.2856729796154405e-06, + "loss": 0.2833, + "step": 527, + "teacher_loss": 0.25130024552345276 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.5658714771270752, + "learning_rate": 2.2900101199942173e-06, + "loss": 0.3149, + "step": 528, + "teacher_loss": 0.28703922033309937 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.5094200372695923, + "learning_rate": 2.294347260372994e-06, + "loss": 0.3207, + "step": 529, + "teacher_loss": 0.29976028203964233 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.5431407690048218, + "learning_rate": 2.2986844007517712e-06, + "loss": 0.3439, + "step": 530, + "teacher_loss": 0.3217581510543823 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.19011594355106354, + "learning_rate": 2.303021541130548e-06, + "loss": 0.2466, + "step": 531, + "teacher_loss": 0.2528422772884369 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.512394368648529, + "learning_rate": 2.307358681509325e-06, + "loss": 0.343, + "step": 532, + "teacher_loss": 0.3241584002971649 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.4413803815841675, + "learning_rate": 2.311695821888102e-06, + "loss": 0.2907, + "step": 533, + "teacher_loss": 0.2739686369895935 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.6413787007331848, + "learning_rate": 2.3160329622668788e-06, + "loss": 0.3494, + "step": 534, + "teacher_loss": 0.31693750619888306 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.43459856510162354, + "learning_rate": 2.3203701026456555e-06, + "loss": 0.2386, + "step": 535, + "teacher_loss": 0.2168196439743042 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.49601468443870544, + "learning_rate": 2.3247072430244323e-06, + "loss": 0.3205, + "step": 536, + "teacher_loss": 0.3009607791900635 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.6900720596313477, + "learning_rate": 2.3290443834032095e-06, + "loss": 0.291, + "step": 537, + "teacher_loss": 0.24667127430438995 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.31261855363845825, + "learning_rate": 2.3333815237819867e-06, + "loss": 0.2045, + "step": 538, + "teacher_loss": 0.19246099889278412 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.8484382629394531, + "learning_rate": 2.3377186641607635e-06, + "loss": 0.4827, + "step": 539, + "teacher_loss": 0.4420757591724396 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.4161158502101898, + "learning_rate": 2.3420558045395403e-06, + "loss": 0.2889, + "step": 540, + "teacher_loss": 0.27474966645240784 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.425876647233963, + "learning_rate": 2.3463929449183175e-06, + "loss": 0.233, + "step": 541, + "teacher_loss": 0.2116113007068634 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.42528918385505676, + "learning_rate": 2.3507300852970942e-06, + "loss": 0.2988, + "step": 542, + "teacher_loss": 0.28477996587753296 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.6934197545051575, + "learning_rate": 2.355067225675871e-06, + "loss": 0.3753, + "step": 543, + "teacher_loss": 0.33992424607276917 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.42282840609550476, + "learning_rate": 2.3594043660546478e-06, + "loss": 0.2656, + "step": 544, + "teacher_loss": 0.24810999631881714 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3808443248271942, + "learning_rate": 2.363741506433425e-06, + "loss": 0.2999, + "step": 545, + "teacher_loss": 0.2908857464790344 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.46012943983078003, + "learning_rate": 2.3680786468122018e-06, + "loss": 0.3068, + "step": 546, + "teacher_loss": 0.28980737924575806 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.7822651863098145, + "learning_rate": 2.3724157871909785e-06, + "loss": 0.3352, + "step": 547, + "teacher_loss": 0.2855678200721741 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.23164251446723938, + "learning_rate": 2.3767529275697557e-06, + "loss": 0.1892, + "step": 548, + "teacher_loss": 0.18445700407028198 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.22667209804058075, + "learning_rate": 2.381090067948533e-06, + "loss": 0.2639, + "step": 549, + "teacher_loss": 0.2680549621582031 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 1.0708098411560059, + "learning_rate": 2.3854272083273097e-06, + "loss": 0.5789, + "step": 550, + "teacher_loss": 0.5242176055908203 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.45995640754699707, + "learning_rate": 2.3897643487060865e-06, + "loss": 0.4241, + "step": 551, + "teacher_loss": 0.42016488313674927 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.52562415599823, + "learning_rate": 2.3941014890848637e-06, + "loss": 0.2929, + "step": 552, + "teacher_loss": 0.2669890522956848 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.2499690055847168, + "learning_rate": 2.3984386294636405e-06, + "loss": 0.2479, + "step": 553, + "teacher_loss": 0.24763701856136322 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.24137386679649353, + "learning_rate": 2.4027757698424172e-06, + "loss": 0.2254, + "step": 554, + "teacher_loss": 0.2235698699951172 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3888161778450012, + "learning_rate": 2.407112910221194e-06, + "loss": 0.2376, + "step": 555, + "teacher_loss": 0.22075967490673065 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.31894826889038086, + "learning_rate": 2.411450050599971e-06, + "loss": 0.2192, + "step": 556, + "teacher_loss": 0.20806646347045898 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.248812735080719, + "learning_rate": 2.415787190978748e-06, + "loss": 0.2175, + "step": 557, + "teacher_loss": 0.21404403448104858 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.34315747022628784, + "learning_rate": 2.4201243313575248e-06, + "loss": 0.2403, + "step": 558, + "teacher_loss": 0.22885122895240784 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.4260071814060211, + "learning_rate": 2.4244614717363015e-06, + "loss": 0.2778, + "step": 559, + "teacher_loss": 0.2613573670387268 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3047531247138977, + "learning_rate": 2.428798612115079e-06, + "loss": 0.1531, + "step": 560, + "teacher_loss": 0.13628603518009186 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.39074549078941345, + "learning_rate": 2.433135752493856e-06, + "loss": 0.3214, + "step": 561, + "teacher_loss": 0.3136478662490845 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.332958459854126, + "learning_rate": 2.4374728928726327e-06, + "loss": 0.1695, + "step": 562, + "teacher_loss": 0.15136626362800598 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.27515649795532227, + "learning_rate": 2.44181003325141e-06, + "loss": 0.209, + "step": 563, + "teacher_loss": 0.2016858160495758 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.2551475167274475, + "learning_rate": 2.4461471736301867e-06, + "loss": 0.1781, + "step": 564, + "teacher_loss": 0.16953346133232117 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.31979674100875854, + "learning_rate": 2.4504843140089635e-06, + "loss": 0.2742, + "step": 565, + "teacher_loss": 0.2690945863723755 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.5875164270401001, + "learning_rate": 2.4548214543877402e-06, + "loss": 0.2876, + "step": 566, + "teacher_loss": 0.2542589604854584 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.7272858619689941, + "learning_rate": 2.4591585947665174e-06, + "loss": 0.3647, + "step": 567, + "teacher_loss": 0.32438141107559204 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3112044334411621, + "learning_rate": 2.463495735145294e-06, + "loss": 0.3472, + "step": 568, + "teacher_loss": 0.3511584997177124 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.35305026173591614, + "learning_rate": 2.467832875524071e-06, + "loss": 0.2094, + "step": 569, + "teacher_loss": 0.19340015947818756 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.24011370539665222, + "learning_rate": 2.4721700159028477e-06, + "loss": 0.2487, + "step": 570, + "teacher_loss": 0.24964269995689392 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.7539844512939453, + "learning_rate": 2.4765071562816254e-06, + "loss": 0.2901, + "step": 571, + "teacher_loss": 0.23853300511837006 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.4385092258453369, + "learning_rate": 2.480844296660402e-06, + "loss": 0.2123, + "step": 572, + "teacher_loss": 0.1871197521686554 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.2902497351169586, + "learning_rate": 2.485181437039179e-06, + "loss": 0.1961, + "step": 573, + "teacher_loss": 0.18561913073062897 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3843946158885956, + "learning_rate": 2.4895185774179557e-06, + "loss": 0.2503, + "step": 574, + "teacher_loss": 0.23541009426116943 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.9851076602935791, + "learning_rate": 2.493855717796733e-06, + "loss": 0.3746, + "step": 575, + "teacher_loss": 0.30675411224365234 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.075462706387043, + "learning_rate": 2.4981928581755097e-06, + "loss": 0.1577, + "step": 576, + "teacher_loss": 0.1667826622724533 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.47844499349594116, + "learning_rate": 2.5025299985542864e-06, + "loss": 0.5685, + "step": 577, + "teacher_loss": 0.5784728527069092 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.12621146440505981, + "learning_rate": 2.5068671389330636e-06, + "loss": 0.1968, + "step": 578, + "teacher_loss": 0.20459681749343872 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.5721890926361084, + "learning_rate": 2.5112042793118404e-06, + "loss": 0.3177, + "step": 579, + "teacher_loss": 0.28942903876304626 + }, + { + "compression_loss": 0.0, + "epoch": 0.1, + "label_loss": 0.3502286374568939, + "learning_rate": 2.515541419690617e-06, + "loss": 0.2811, + "step": 580, + "teacher_loss": 0.273428738117218 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.3754081130027771, + "learning_rate": 2.5198785600693944e-06, + "loss": 0.2641, + "step": 581, + "teacher_loss": 0.25171878933906555 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.19309651851654053, + "learning_rate": 2.5242157004481716e-06, + "loss": 0.1955, + "step": 582, + "teacher_loss": 0.19573545455932617 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.9328293204307556, + "learning_rate": 2.5285528408269484e-06, + "loss": 0.3228, + "step": 583, + "teacher_loss": 0.25504833459854126 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.4727112352848053, + "learning_rate": 2.532889981205725e-06, + "loss": 0.2623, + "step": 584, + "teacher_loss": 0.2389705628156662 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.25859129428863525, + "learning_rate": 2.537227121584502e-06, + "loss": 0.1909, + "step": 585, + "teacher_loss": 0.1833484172821045 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.4703024625778198, + "learning_rate": 2.541564261963279e-06, + "loss": 0.2628, + "step": 586, + "teacher_loss": 0.23972968757152557 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.5282617807388306, + "learning_rate": 2.545901402342056e-06, + "loss": 0.2257, + "step": 587, + "teacher_loss": 0.19205081462860107 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.6147003173828125, + "learning_rate": 2.5502385427208327e-06, + "loss": 0.4944, + "step": 588, + "teacher_loss": 0.48107486963272095 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.6036140322685242, + "learning_rate": 2.5545756830996094e-06, + "loss": 0.2811, + "step": 589, + "teacher_loss": 0.24524986743927002 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.30564770102500916, + "learning_rate": 2.5589128234783866e-06, + "loss": 0.2573, + "step": 590, + "teacher_loss": 0.2519502639770508 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.2896535098552704, + "learning_rate": 2.5632499638571634e-06, + "loss": 0.2088, + "step": 591, + "teacher_loss": 0.19982947409152985 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.29479333758354187, + "learning_rate": 2.5675871042359406e-06, + "loss": 0.1996, + "step": 592, + "teacher_loss": 0.18897177278995514 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.40671059489250183, + "learning_rate": 2.5719242446147174e-06, + "loss": 0.2508, + "step": 593, + "teacher_loss": 0.23344384133815765 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.4257102906703949, + "learning_rate": 2.5762613849934946e-06, + "loss": 0.2808, + "step": 594, + "teacher_loss": 0.2647053599357605 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.5725194215774536, + "learning_rate": 2.5805985253722714e-06, + "loss": 0.3676, + "step": 595, + "teacher_loss": 0.3448231816291809 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.2081710696220398, + "learning_rate": 2.584935665751048e-06, + "loss": 0.2166, + "step": 596, + "teacher_loss": 0.2175840586423874 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.29855772852897644, + "learning_rate": 2.5892728061298253e-06, + "loss": 0.3396, + "step": 597, + "teacher_loss": 0.3441075086593628 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.46510887145996094, + "learning_rate": 2.593609946508602e-06, + "loss": 0.3173, + "step": 598, + "teacher_loss": 0.30083948373794556 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.3710855543613434, + "learning_rate": 2.597947086887379e-06, + "loss": 0.2498, + "step": 599, + "teacher_loss": 0.2362690418958664 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.7629257440567017, + "learning_rate": 2.6022842272661557e-06, + "loss": 0.2722, + "step": 600, + "teacher_loss": 0.21764832735061646 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.36502036452293396, + "learning_rate": 2.606621367644933e-06, + "loss": 0.3853, + "step": 601, + "teacher_loss": 0.3875422477722168 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.7474969625473022, + "learning_rate": 2.6109585080237096e-06, + "loss": 0.8236, + "step": 602, + "teacher_loss": 0.832101047039032 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.616251528263092, + "learning_rate": 2.615295648402487e-06, + "loss": 0.2672, + "step": 603, + "teacher_loss": 0.22836601734161377 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.5053845643997192, + "learning_rate": 2.6196327887812636e-06, + "loss": 0.2442, + "step": 604, + "teacher_loss": 0.2151930332183838 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.8031680583953857, + "learning_rate": 2.623969929160041e-06, + "loss": 0.3599, + "step": 605, + "teacher_loss": 0.3106900453567505 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.42707711458206177, + "learning_rate": 2.6283070695388176e-06, + "loss": 0.3834, + "step": 606, + "teacher_loss": 0.3785204589366913 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.32388240098953247, + "learning_rate": 2.6326442099175944e-06, + "loss": 0.2524, + "step": 607, + "teacher_loss": 0.24450750648975372 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.8198894262313843, + "learning_rate": 2.636981350296371e-06, + "loss": 0.3624, + "step": 608, + "teacher_loss": 0.3115271031856537 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.48805996775627136, + "learning_rate": 2.6413184906751483e-06, + "loss": 0.2706, + "step": 609, + "teacher_loss": 0.24648529291152954 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.38548821210861206, + "learning_rate": 2.645655631053925e-06, + "loss": 0.207, + "step": 610, + "teacher_loss": 0.18722303211688995 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.36671972274780273, + "learning_rate": 2.649992771432702e-06, + "loss": 0.3008, + "step": 611, + "teacher_loss": 0.29345256090164185 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.5331743359565735, + "learning_rate": 2.654329911811479e-06, + "loss": 0.2645, + "step": 612, + "teacher_loss": 0.23459255695343018 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.528983473777771, + "learning_rate": 2.658667052190256e-06, + "loss": 0.4801, + "step": 613, + "teacher_loss": 0.47472190856933594 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.29589593410491943, + "learning_rate": 2.663004192569033e-06, + "loss": 0.2403, + "step": 614, + "teacher_loss": 0.23415866494178772 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.47103583812713623, + "learning_rate": 2.66734133294781e-06, + "loss": 0.314, + "step": 615, + "teacher_loss": 0.2965882420539856 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.3213235139846802, + "learning_rate": 2.671678473326587e-06, + "loss": 0.2536, + "step": 616, + "teacher_loss": 0.24612219631671906 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.3729945123195648, + "learning_rate": 2.676015613705364e-06, + "loss": 0.2534, + "step": 617, + "teacher_loss": 0.2400979995727539 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.1334015429019928, + "learning_rate": 2.6803527540841406e-06, + "loss": 0.1957, + "step": 618, + "teacher_loss": 0.20257017016410828 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.586087167263031, + "learning_rate": 2.6846898944629173e-06, + "loss": 0.2343, + "step": 619, + "teacher_loss": 0.1952415406703949 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.7722219824790955, + "learning_rate": 2.6890270348416945e-06, + "loss": 0.339, + "step": 620, + "teacher_loss": 0.29087793827056885 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.4624701738357544, + "learning_rate": 2.6933641752204713e-06, + "loss": 0.2831, + "step": 621, + "teacher_loss": 0.26322197914123535 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.49739134311676025, + "learning_rate": 2.697701315599248e-06, + "loss": 0.364, + "step": 622, + "teacher_loss": 0.3492274284362793 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.3222472071647644, + "learning_rate": 2.702038455978025e-06, + "loss": 0.2405, + "step": 623, + "teacher_loss": 0.2313927710056305 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.46010762453079224, + "learning_rate": 2.706375596356802e-06, + "loss": 0.2425, + "step": 624, + "teacher_loss": 0.21834558248519897 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.2531857192516327, + "learning_rate": 2.7107127367355793e-06, + "loss": 0.2099, + "step": 625, + "teacher_loss": 0.2051263004541397 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.11980043351650238, + "learning_rate": 2.715049877114356e-06, + "loss": 0.2235, + "step": 626, + "teacher_loss": 0.23500694334506989 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.22379769384860992, + "learning_rate": 2.7193870174931332e-06, + "loss": 0.256, + "step": 627, + "teacher_loss": 0.25958359241485596 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.9325609803199768, + "learning_rate": 2.72372415787191e-06, + "loss": 0.3507, + "step": 628, + "teacher_loss": 0.2860622704029083 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.33083322644233704, + "learning_rate": 2.728061298250687e-06, + "loss": 0.3106, + "step": 629, + "teacher_loss": 0.3083771765232086 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.5008382797241211, + "learning_rate": 2.7323984386294636e-06, + "loss": 0.2427, + "step": 630, + "teacher_loss": 0.2140641063451767 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.28264960646629333, + "learning_rate": 2.7367355790082408e-06, + "loss": 0.3993, + "step": 631, + "teacher_loss": 0.41220974922180176 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.6034846305847168, + "learning_rate": 2.7410727193870175e-06, + "loss": 0.2468, + "step": 632, + "teacher_loss": 0.2071148157119751 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.46136119961738586, + "learning_rate": 2.7454098597657943e-06, + "loss": 0.3234, + "step": 633, + "teacher_loss": 0.30802151560783386 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.4579277038574219, + "learning_rate": 2.749747000144571e-06, + "loss": 0.3117, + "step": 634, + "teacher_loss": 0.29548346996307373 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.49554747343063354, + "learning_rate": 2.7540841405233483e-06, + "loss": 0.2649, + "step": 635, + "teacher_loss": 0.23922522366046906 + }, + { + "compression_loss": 0.0, + "epoch": 0.11, + "label_loss": 0.4182722568511963, + "learning_rate": 2.7584212809021255e-06, + "loss": 0.2286, + "step": 636, + "teacher_loss": 0.20752517879009247 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3358771800994873, + "learning_rate": 2.7627584212809023e-06, + "loss": 0.2652, + "step": 637, + "teacher_loss": 0.25736862421035767 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3030616343021393, + "learning_rate": 2.767095561659679e-06, + "loss": 0.2204, + "step": 638, + "teacher_loss": 0.21119824051856995 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.5646266341209412, + "learning_rate": 2.7714327020384562e-06, + "loss": 0.2735, + "step": 639, + "teacher_loss": 0.2411031424999237 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.36879676580429077, + "learning_rate": 2.775769842417233e-06, + "loss": 0.232, + "step": 640, + "teacher_loss": 0.21680012345314026 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.40672093629837036, + "learning_rate": 2.78010698279601e-06, + "loss": 0.2302, + "step": 641, + "teacher_loss": 0.21060490608215332 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.49960020184516907, + "learning_rate": 2.784444123174787e-06, + "loss": 0.3092, + "step": 642, + "teacher_loss": 0.2879989445209503 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.33145076036453247, + "learning_rate": 2.7887812635535638e-06, + "loss": 0.2538, + "step": 643, + "teacher_loss": 0.24521225690841675 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.8160698413848877, + "learning_rate": 2.7931184039323405e-06, + "loss": 0.3184, + "step": 644, + "teacher_loss": 0.2630925476551056 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.5901575088500977, + "learning_rate": 2.7974555443111173e-06, + "loss": 0.271, + "step": 645, + "teacher_loss": 0.235523521900177 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3425023555755615, + "learning_rate": 2.8017926846898945e-06, + "loss": 0.2651, + "step": 646, + "teacher_loss": 0.2564578652381897 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.24096450209617615, + "learning_rate": 2.8061298250686717e-06, + "loss": 0.2015, + "step": 647, + "teacher_loss": 0.19714361429214478 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.4299507439136505, + "learning_rate": 2.8104669654474485e-06, + "loss": 0.2061, + "step": 648, + "teacher_loss": 0.18122011423110962 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.40944671630859375, + "learning_rate": 2.8148041058262253e-06, + "loss": 0.2472, + "step": 649, + "teacher_loss": 0.229185089468956 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.30898597836494446, + "learning_rate": 2.8191412462050025e-06, + "loss": 0.2291, + "step": 650, + "teacher_loss": 0.22021184861660004 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.38677287101745605, + "learning_rate": 2.8234783865837792e-06, + "loss": 0.325, + "step": 651, + "teacher_loss": 0.3181048631668091 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.13863763213157654, + "learning_rate": 2.827815526962556e-06, + "loss": 0.1586, + "step": 652, + "teacher_loss": 0.16076484322547913 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.1925647109746933, + "learning_rate": 2.8321526673413328e-06, + "loss": 0.1833, + "step": 653, + "teacher_loss": 0.18221831321716309 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.6435182690620422, + "learning_rate": 2.83648980772011e-06, + "loss": 0.3567, + "step": 654, + "teacher_loss": 0.32485026121139526 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.5975439548492432, + "learning_rate": 2.8408269480988868e-06, + "loss": 0.2514, + "step": 655, + "teacher_loss": 0.2129727303981781 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 1.0947407484054565, + "learning_rate": 2.8451640884776635e-06, + "loss": 0.5505, + "step": 656, + "teacher_loss": 0.48999854922294617 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.48540788888931274, + "learning_rate": 2.8495012288564407e-06, + "loss": 0.3447, + "step": 657, + "teacher_loss": 0.32911601662635803 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.19058403372764587, + "learning_rate": 2.853838369235218e-06, + "loss": 0.2083, + "step": 658, + "teacher_loss": 0.21025574207305908 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.49009907245635986, + "learning_rate": 2.8581755096139947e-06, + "loss": 0.2939, + "step": 659, + "teacher_loss": 0.27211275696754456 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.44107699394226074, + "learning_rate": 2.8625126499927715e-06, + "loss": 0.3083, + "step": 660, + "teacher_loss": 0.29355186223983765 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.2993045151233673, + "learning_rate": 2.8668497903715487e-06, + "loss": 0.2565, + "step": 661, + "teacher_loss": 0.25171995162963867 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3849508464336395, + "learning_rate": 2.8711869307503255e-06, + "loss": 0.2775, + "step": 662, + "teacher_loss": 0.26558274030685425 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.4832845628261566, + "learning_rate": 2.8755240711291022e-06, + "loss": 0.2213, + "step": 663, + "teacher_loss": 0.1921844184398651 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.21154847741127014, + "learning_rate": 2.879861211507879e-06, + "loss": 0.225, + "step": 664, + "teacher_loss": 0.22644278407096863 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.875374972820282, + "learning_rate": 2.884198351886656e-06, + "loss": 0.3541, + "step": 665, + "teacher_loss": 0.2961379289627075 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.26852554082870483, + "learning_rate": 2.888535492265433e-06, + "loss": 0.2428, + "step": 666, + "teacher_loss": 0.2399265021085739 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.9165135622024536, + "learning_rate": 2.8928726326442098e-06, + "loss": 0.3651, + "step": 667, + "teacher_loss": 0.3038022518157959 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.4079369008541107, + "learning_rate": 2.897209773022987e-06, + "loss": 0.2339, + "step": 668, + "teacher_loss": 0.21461129188537598 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.566765546798706, + "learning_rate": 2.901546913401764e-06, + "loss": 0.281, + "step": 669, + "teacher_loss": 0.2492278516292572 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.35190725326538086, + "learning_rate": 2.905884053780541e-06, + "loss": 0.201, + "step": 670, + "teacher_loss": 0.18422357738018036 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3302437663078308, + "learning_rate": 2.9102211941593177e-06, + "loss": 0.2335, + "step": 671, + "teacher_loss": 0.222714364528656 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.4518120288848877, + "learning_rate": 2.9145583345380945e-06, + "loss": 0.2271, + "step": 672, + "teacher_loss": 0.20215407013893127 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.7353434562683105, + "learning_rate": 2.9188954749168717e-06, + "loss": 0.2333, + "step": 673, + "teacher_loss": 0.17753830552101135 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.2602654993534088, + "learning_rate": 2.9232326152956484e-06, + "loss": 0.2398, + "step": 674, + "teacher_loss": 0.2375141978263855 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.2193537950515747, + "learning_rate": 2.9275697556744252e-06, + "loss": 0.2044, + "step": 675, + "teacher_loss": 0.20269176363945007 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3218291699886322, + "learning_rate": 2.9319068960532024e-06, + "loss": 0.2302, + "step": 676, + "teacher_loss": 0.2199709713459015 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.5002689957618713, + "learning_rate": 2.936244036431979e-06, + "loss": 0.4245, + "step": 677, + "teacher_loss": 0.41603514552116394 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.724240779876709, + "learning_rate": 2.940581176810756e-06, + "loss": 0.3331, + "step": 678, + "teacher_loss": 0.28959372639656067 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.8729182481765747, + "learning_rate": 2.944918317189533e-06, + "loss": 0.301, + "step": 679, + "teacher_loss": 0.23748816549777985 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.3842735290527344, + "learning_rate": 2.9492554575683104e-06, + "loss": 0.2296, + "step": 680, + "teacher_loss": 0.21238964796066284 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.529210090637207, + "learning_rate": 2.953592597947087e-06, + "loss": 0.2688, + "step": 681, + "teacher_loss": 0.2399199903011322 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.9025293588638306, + "learning_rate": 2.957929738325864e-06, + "loss": 0.4873, + "step": 682, + "teacher_loss": 0.4411899149417877 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.43346190452575684, + "learning_rate": 2.9622668787046407e-06, + "loss": 0.2425, + "step": 683, + "teacher_loss": 0.22132349014282227 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.615806519985199, + "learning_rate": 2.966604019083418e-06, + "loss": 0.2619, + "step": 684, + "teacher_loss": 0.2226225882768631 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.7495501041412354, + "learning_rate": 2.9709411594621947e-06, + "loss": 0.4868, + "step": 685, + "teacher_loss": 0.45756083726882935 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.22026360034942627, + "learning_rate": 2.9752782998409714e-06, + "loss": 0.3317, + "step": 686, + "teacher_loss": 0.34406578540802 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.32584649324417114, + "learning_rate": 2.9796154402197482e-06, + "loss": 0.2413, + "step": 687, + "teacher_loss": 0.23193639516830444 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.47305089235305786, + "learning_rate": 2.9839525805985254e-06, + "loss": 0.2498, + "step": 688, + "teacher_loss": 0.22497621178627014 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.4568033516407013, + "learning_rate": 2.988289720977302e-06, + "loss": 0.2522, + "step": 689, + "teacher_loss": 0.22951287031173706 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 1.03564453125, + "learning_rate": 2.9926268613560794e-06, + "loss": 0.3985, + "step": 690, + "teacher_loss": 0.3276674747467041 + }, + { + "compression_loss": 0.0, + "epoch": 0.12, + "label_loss": 0.30183354020118713, + "learning_rate": 2.9969640017348566e-06, + "loss": 0.2005, + "step": 691, + "teacher_loss": 0.18919554352760315 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 1.2837392091751099, + "learning_rate": 3.0013011421136334e-06, + "loss": 0.6489, + "step": 692, + "teacher_loss": 0.5784119963645935 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.37290507555007935, + "learning_rate": 3.00563828249241e-06, + "loss": 0.2159, + "step": 693, + "teacher_loss": 0.19843712449073792 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.39942124485969543, + "learning_rate": 3.009975422871187e-06, + "loss": 0.2365, + "step": 694, + "teacher_loss": 0.21843993663787842 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.14800088107585907, + "learning_rate": 3.014312563249964e-06, + "loss": 0.1527, + "step": 695, + "teacher_loss": 0.15322959423065186 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5956727266311646, + "learning_rate": 3.018649703628741e-06, + "loss": 0.2825, + "step": 696, + "teacher_loss": 0.24765954911708832 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6925480365753174, + "learning_rate": 3.0229868440075177e-06, + "loss": 0.351, + "step": 697, + "teacher_loss": 0.31300532817840576 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.26216253638267517, + "learning_rate": 3.0273239843862944e-06, + "loss": 0.249, + "step": 698, + "teacher_loss": 0.24751925468444824 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.3048938512802124, + "learning_rate": 3.0316611247650716e-06, + "loss": 0.1754, + "step": 699, + "teacher_loss": 0.16102465987205505 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.45099547505378723, + "learning_rate": 3.0359982651438484e-06, + "loss": 0.3119, + "step": 700, + "teacher_loss": 0.2964308559894562 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6762874126434326, + "learning_rate": 3.0403354055226256e-06, + "loss": 0.2286, + "step": 701, + "teacher_loss": 0.17884431779384613 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.24709640443325043, + "learning_rate": 3.0446725459014024e-06, + "loss": 0.1938, + "step": 702, + "teacher_loss": 0.18782320618629456 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.3207492530345917, + "learning_rate": 3.0490096862801796e-06, + "loss": 0.2962, + "step": 703, + "teacher_loss": 0.2934865653514862 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.23919939994812012, + "learning_rate": 3.0533468266589564e-06, + "loss": 0.183, + "step": 704, + "teacher_loss": 0.17681071162223816 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.3603303134441376, + "learning_rate": 3.057683967037733e-06, + "loss": 0.2886, + "step": 705, + "teacher_loss": 0.28064680099487305 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5991061925888062, + "learning_rate": 3.06202110741651e-06, + "loss": 0.2929, + "step": 706, + "teacher_loss": 0.25882866978645325 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5482435822486877, + "learning_rate": 3.066358247795287e-06, + "loss": 0.2489, + "step": 707, + "teacher_loss": 0.21561436355113983 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5382323861122131, + "learning_rate": 3.070695388174064e-06, + "loss": 0.2916, + "step": 708, + "teacher_loss": 0.2642223834991455 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.2919009029865265, + "learning_rate": 3.0750325285528407e-06, + "loss": 0.2967, + "step": 709, + "teacher_loss": 0.2972421944141388 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.4450782239437103, + "learning_rate": 3.079369668931618e-06, + "loss": 0.2236, + "step": 710, + "teacher_loss": 0.19893765449523926 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5930566787719727, + "learning_rate": 3.0837068093103946e-06, + "loss": 0.2892, + "step": 711, + "teacher_loss": 0.2554784417152405 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.18835720419883728, + "learning_rate": 3.088043949689172e-06, + "loss": 0.2581, + "step": 712, + "teacher_loss": 0.2658957242965698 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.906446099281311, + "learning_rate": 3.0923810900679486e-06, + "loss": 0.3265, + "step": 713, + "teacher_loss": 0.26210784912109375 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6868412494659424, + "learning_rate": 3.096718230446726e-06, + "loss": 0.5075, + "step": 714, + "teacher_loss": 0.48762717843055725 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.7534281611442566, + "learning_rate": 3.1010553708255026e-06, + "loss": 0.3946, + "step": 715, + "teacher_loss": 0.35474893450737 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 1.2374625205993652, + "learning_rate": 3.1053925112042794e-06, + "loss": 0.5817, + "step": 716, + "teacher_loss": 0.5088511109352112 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5261648893356323, + "learning_rate": 3.109729651583056e-06, + "loss": 0.2943, + "step": 717, + "teacher_loss": 0.2685818076133728 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5450520515441895, + "learning_rate": 3.1140667919618333e-06, + "loss": 0.3611, + "step": 718, + "teacher_loss": 0.3406655788421631 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6108411550521851, + "learning_rate": 3.11840393234061e-06, + "loss": 0.2455, + "step": 719, + "teacher_loss": 0.2048671692609787 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.13773435354232788, + "learning_rate": 3.122741072719387e-06, + "loss": 0.1687, + "step": 720, + "teacher_loss": 0.17214879393577576 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.3014611601829529, + "learning_rate": 3.1270782130981637e-06, + "loss": 0.2169, + "step": 721, + "teacher_loss": 0.2075299620628357 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5451130270957947, + "learning_rate": 3.131415353476941e-06, + "loss": 0.4403, + "step": 722, + "teacher_loss": 0.42868685722351074 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6346304416656494, + "learning_rate": 3.135752493855718e-06, + "loss": 0.283, + "step": 723, + "teacher_loss": 0.24390821158885956 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.25397324562072754, + "learning_rate": 3.140089634234495e-06, + "loss": 0.2028, + "step": 724, + "teacher_loss": 0.19710049033164978 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.8822857737541199, + "learning_rate": 3.144426774613272e-06, + "loss": 0.2763, + "step": 725, + "teacher_loss": 0.20893266797065735 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.37545570731163025, + "learning_rate": 3.148763914992049e-06, + "loss": 0.1806, + "step": 726, + "teacher_loss": 0.1590040773153305 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.3882199227809906, + "learning_rate": 3.1531010553708256e-06, + "loss": 0.2071, + "step": 727, + "teacher_loss": 0.18702653050422668 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6223392486572266, + "learning_rate": 3.1574381957496023e-06, + "loss": 0.5601, + "step": 728, + "teacher_loss": 0.5531485676765442 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.2948831617832184, + "learning_rate": 3.1617753361283795e-06, + "loss": 0.2305, + "step": 729, + "teacher_loss": 0.2233971804380417 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.24321597814559937, + "learning_rate": 3.1661124765071563e-06, + "loss": 0.1904, + "step": 730, + "teacher_loss": 0.18450817465782166 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.28971239924430847, + "learning_rate": 3.170449616885933e-06, + "loss": 0.2374, + "step": 731, + "teacher_loss": 0.23156000673770905 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.3926895260810852, + "learning_rate": 3.17478675726471e-06, + "loss": 0.2545, + "step": 732, + "teacher_loss": 0.23916417360305786 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.24792593717575073, + "learning_rate": 3.1791238976434875e-06, + "loss": 0.2995, + "step": 733, + "teacher_loss": 0.3052656650543213 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.7794806957244873, + "learning_rate": 3.1834610380222643e-06, + "loss": 0.2642, + "step": 734, + "teacher_loss": 0.2069830298423767 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.6176691055297852, + "learning_rate": 3.187798178401041e-06, + "loss": 0.2906, + "step": 735, + "teacher_loss": 0.2542843222618103 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5163308382034302, + "learning_rate": 3.192135318779818e-06, + "loss": 0.2322, + "step": 736, + "teacher_loss": 0.20063847303390503 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.35234588384628296, + "learning_rate": 3.196472459158595e-06, + "loss": 0.2627, + "step": 737, + "teacher_loss": 0.25275570154190063 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.5535691976547241, + "learning_rate": 3.200809599537372e-06, + "loss": 0.2693, + "step": 738, + "teacher_loss": 0.2376859188079834 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.4028955399990082, + "learning_rate": 3.2051467399161486e-06, + "loss": 0.2317, + "step": 739, + "teacher_loss": 0.21270275115966797 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.1146426647901535, + "learning_rate": 3.2094838802949258e-06, + "loss": 0.3064, + "step": 740, + "teacher_loss": 0.32768210768699646 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.49734219908714294, + "learning_rate": 3.2138210206737025e-06, + "loss": 0.425, + "step": 741, + "teacher_loss": 0.4169650077819824 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.8750267028808594, + "learning_rate": 3.2181581610524793e-06, + "loss": 0.4349, + "step": 742, + "teacher_loss": 0.3860322833061218 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.8160274028778076, + "learning_rate": 3.222495301431256e-06, + "loss": 0.4751, + "step": 743, + "teacher_loss": 0.4372653365135193 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.562263011932373, + "learning_rate": 3.2268324418100337e-06, + "loss": 0.255, + "step": 744, + "teacher_loss": 0.22087424993515015 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.670231819152832, + "learning_rate": 3.2311695821888105e-06, + "loss": 0.3416, + "step": 745, + "teacher_loss": 0.30503684282302856 + }, + { + "compression_loss": 0.0, + "epoch": 0.13, + "label_loss": 0.23462998867034912, + "learning_rate": 3.2355067225675873e-06, + "loss": 0.2661, + "step": 746, + "teacher_loss": 0.26962926983833313 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3502556383609772, + "learning_rate": 3.239843862946364e-06, + "loss": 0.1967, + "step": 747, + "teacher_loss": 0.17964071035385132 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3415452837944031, + "learning_rate": 3.2441810033251412e-06, + "loss": 0.2643, + "step": 748, + "teacher_loss": 0.2556981146335602 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.47424471378326416, + "learning_rate": 3.248518143703918e-06, + "loss": 0.2529, + "step": 749, + "teacher_loss": 0.228349506855011 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.25609880685806274, + "learning_rate": 3.2528552840826948e-06, + "loss": 0.247, + "step": 750, + "teacher_loss": 0.24598433077335358 + }, + { + "epoch": 0.14, + "eval_exact_match": 79.7445600756859, + "eval_f1": 87.14584125635479, + "step": 750 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3531723618507385, + "learning_rate": 3.2571924244614716e-06, + "loss": 0.2439, + "step": 751, + "teacher_loss": 0.2317531853914261 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3110284209251404, + "learning_rate": 3.2615295648402488e-06, + "loss": 0.2328, + "step": 752, + "teacher_loss": 0.22415274381637573 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.42995554208755493, + "learning_rate": 3.2658667052190255e-06, + "loss": 0.2421, + "step": 753, + "teacher_loss": 0.22127076983451843 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 1.3285260200500488, + "learning_rate": 3.2702038455978023e-06, + "loss": 0.3176, + "step": 754, + "teacher_loss": 0.2053188681602478 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.7816797494888306, + "learning_rate": 3.27454098597658e-06, + "loss": 0.3153, + "step": 755, + "teacher_loss": 0.2635067105293274 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5141528844833374, + "learning_rate": 3.2788781263553567e-06, + "loss": 0.3618, + "step": 756, + "teacher_loss": 0.34491318464279175 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.9236624240875244, + "learning_rate": 3.2832152667341335e-06, + "loss": 0.3505, + "step": 757, + "teacher_loss": 0.2868138551712036 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.6472077369689941, + "learning_rate": 3.2875524071129103e-06, + "loss": 0.2557, + "step": 758, + "teacher_loss": 0.2122364640235901 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.6403120756149292, + "learning_rate": 3.2918895474916875e-06, + "loss": 0.2963, + "step": 759, + "teacher_loss": 0.2581288516521454 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5622844696044922, + "learning_rate": 3.2962266878704642e-06, + "loss": 0.33, + "step": 760, + "teacher_loss": 0.3041532635688782 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5142181515693665, + "learning_rate": 3.300563828249241e-06, + "loss": 0.3063, + "step": 761, + "teacher_loss": 0.28316062688827515 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.22890210151672363, + "learning_rate": 3.3049009686280178e-06, + "loss": 0.3462, + "step": 762, + "teacher_loss": 0.35924601554870605 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3845955431461334, + "learning_rate": 3.309238109006795e-06, + "loss": 0.3149, + "step": 763, + "teacher_loss": 0.3071707487106323 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.839165210723877, + "learning_rate": 3.3135752493855718e-06, + "loss": 0.3819, + "step": 764, + "teacher_loss": 0.33108288049697876 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.38261520862579346, + "learning_rate": 3.3179123897643485e-06, + "loss": 0.201, + "step": 765, + "teacher_loss": 0.18077123165130615 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.24388937652111053, + "learning_rate": 3.3222495301431257e-06, + "loss": 0.1753, + "step": 766, + "teacher_loss": 0.16766047477722168 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5354130268096924, + "learning_rate": 3.326586670521903e-06, + "loss": 0.2528, + "step": 767, + "teacher_loss": 0.22134891152381897 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5595734715461731, + "learning_rate": 3.3309238109006797e-06, + "loss": 0.2817, + "step": 768, + "teacher_loss": 0.2508693337440491 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.1843225210905075, + "learning_rate": 3.3352609512794565e-06, + "loss": 0.224, + "step": 769, + "teacher_loss": 0.22846084833145142 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.34531301259994507, + "learning_rate": 3.3395980916582333e-06, + "loss": 0.3044, + "step": 770, + "teacher_loss": 0.2998705506324768 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3853307366371155, + "learning_rate": 3.3439352320370104e-06, + "loss": 0.3218, + "step": 771, + "teacher_loss": 0.31475961208343506 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.32597827911376953, + "learning_rate": 3.3482723724157872e-06, + "loss": 0.2461, + "step": 772, + "teacher_loss": 0.23727241158485413 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.48131901025772095, + "learning_rate": 3.352609512794564e-06, + "loss": 0.2197, + "step": 773, + "teacher_loss": 0.19060340523719788 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.9750180244445801, + "learning_rate": 3.356946653173341e-06, + "loss": 0.3523, + "step": 774, + "teacher_loss": 0.28305715322494507 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5041630268096924, + "learning_rate": 3.361283793552118e-06, + "loss": 0.3727, + "step": 775, + "teacher_loss": 0.3580412268638611 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5291510820388794, + "learning_rate": 3.3656209339308947e-06, + "loss": 0.3608, + "step": 776, + "teacher_loss": 0.34212982654571533 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5057750940322876, + "learning_rate": 3.369958074309672e-06, + "loss": 0.3006, + "step": 777, + "teacher_loss": 0.2778407633304596 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3278176784515381, + "learning_rate": 3.374295214688449e-06, + "loss": 0.3175, + "step": 778, + "teacher_loss": 0.3163798451423645 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.27269527316093445, + "learning_rate": 3.378632355067226e-06, + "loss": 0.2248, + "step": 779, + "teacher_loss": 0.21945229172706604 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.4998913109302521, + "learning_rate": 3.3829694954460027e-06, + "loss": 0.3142, + "step": 780, + "teacher_loss": 0.293518602848053 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.15515512228012085, + "learning_rate": 3.3873066358247795e-06, + "loss": 0.2447, + "step": 781, + "teacher_loss": 0.25470009446144104 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.6261863708496094, + "learning_rate": 3.3916437762035567e-06, + "loss": 0.257, + "step": 782, + "teacher_loss": 0.21595072746276855 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.46851101517677307, + "learning_rate": 3.3959809165823334e-06, + "loss": 0.1975, + "step": 783, + "teacher_loss": 0.16738788783550262 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5671685934066772, + "learning_rate": 3.4003180569611102e-06, + "loss": 0.3325, + "step": 784, + "teacher_loss": 0.30642879009246826 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3607160151004791, + "learning_rate": 3.404655197339887e-06, + "loss": 0.2779, + "step": 785, + "teacher_loss": 0.26870208978652954 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.2685242295265198, + "learning_rate": 3.408992337718664e-06, + "loss": 0.4174, + "step": 786, + "teacher_loss": 0.43390893936157227 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5977778434753418, + "learning_rate": 3.413329478097441e-06, + "loss": 0.3765, + "step": 787, + "teacher_loss": 0.35194242000579834 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.49139028787612915, + "learning_rate": 3.417666618476218e-06, + "loss": 0.2706, + "step": 788, + "teacher_loss": 0.24603916704654694 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.3657628297805786, + "learning_rate": 3.4220037588549954e-06, + "loss": 0.2407, + "step": 789, + "teacher_loss": 0.22684475779533386 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.4599382281303406, + "learning_rate": 3.426340899233772e-06, + "loss": 0.3181, + "step": 790, + "teacher_loss": 0.3022891879081726 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.41153550148010254, + "learning_rate": 3.430678039612549e-06, + "loss": 0.2435, + "step": 791, + "teacher_loss": 0.2247842252254486 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5343309640884399, + "learning_rate": 3.4350151799913257e-06, + "loss": 0.315, + "step": 792, + "teacher_loss": 0.2906323969364166 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.35651153326034546, + "learning_rate": 3.439352320370103e-06, + "loss": 0.2195, + "step": 793, + "teacher_loss": 0.20424365997314453 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.38014569878578186, + "learning_rate": 3.4436894607488797e-06, + "loss": 0.2973, + "step": 794, + "teacher_loss": 0.2881115972995758 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5738981366157532, + "learning_rate": 3.4480266011276564e-06, + "loss": 0.2879, + "step": 795, + "teacher_loss": 0.25608059763908386 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.5806160569190979, + "learning_rate": 3.4523637415064332e-06, + "loss": 0.306, + "step": 796, + "teacher_loss": 0.2754618525505066 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.4372144937515259, + "learning_rate": 3.4567008818852104e-06, + "loss": 0.2724, + "step": 797, + "teacher_loss": 0.254102885723114 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.354651540517807, + "learning_rate": 3.461038022263987e-06, + "loss": 0.2546, + "step": 798, + "teacher_loss": 0.24346190690994263 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.6520552039146423, + "learning_rate": 3.4653751626427644e-06, + "loss": 0.2781, + "step": 799, + "teacher_loss": 0.23655402660369873 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.4892185628414154, + "learning_rate": 3.469712303021541e-06, + "loss": 0.3559, + "step": 800, + "teacher_loss": 0.34104424715042114 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 0.4080970287322998, + "learning_rate": 3.4740494434003184e-06, + "loss": 0.1973, + "step": 801, + "teacher_loss": 0.17385894060134888 + }, + { + "compression_loss": 0.0, + "epoch": 0.14, + "label_loss": 1.0153800249099731, + "learning_rate": 3.478386583779095e-06, + "loss": 0.3055, + "step": 802, + "teacher_loss": 0.22665008902549744 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.5426517724990845, + "learning_rate": 3.482723724157872e-06, + "loss": 0.283, + "step": 803, + "teacher_loss": 0.2541946470737457 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 1.054621934890747, + "learning_rate": 3.487060864536649e-06, + "loss": 0.313, + "step": 804, + "teacher_loss": 0.23054583370685577 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4915931224822998, + "learning_rate": 3.491398004915426e-06, + "loss": 0.3753, + "step": 805, + "teacher_loss": 0.36235424876213074 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.661030650138855, + "learning_rate": 3.4957351452942027e-06, + "loss": 0.3455, + "step": 806, + "teacher_loss": 0.31044602394104004 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.23664268851280212, + "learning_rate": 3.5000722856729794e-06, + "loss": 0.2015, + "step": 807, + "teacher_loss": 0.19761332869529724 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.6139421463012695, + "learning_rate": 3.5044094260517566e-06, + "loss": 0.439, + "step": 808, + "teacher_loss": 0.4196016788482666 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.26901984214782715, + "learning_rate": 3.508746566430534e-06, + "loss": 0.2893, + "step": 809, + "teacher_loss": 0.2915344834327698 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.7388201951980591, + "learning_rate": 3.5130837068093106e-06, + "loss": 0.3154, + "step": 810, + "teacher_loss": 0.26829952001571655 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4415381848812103, + "learning_rate": 3.5174208471880874e-06, + "loss": 0.2223, + "step": 811, + "teacher_loss": 0.19795460999011993 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.32499265670776367, + "learning_rate": 3.5217579875668646e-06, + "loss": 0.3234, + "step": 812, + "teacher_loss": 0.3232687711715698 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.2048925906419754, + "learning_rate": 3.5260951279456414e-06, + "loss": 0.1826, + "step": 813, + "teacher_loss": 0.18011921644210815 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.18259766697883606, + "learning_rate": 3.530432268324418e-06, + "loss": 0.2178, + "step": 814, + "teacher_loss": 0.22171109914779663 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4891200661659241, + "learning_rate": 3.534769408703195e-06, + "loss": 0.3313, + "step": 815, + "teacher_loss": 0.3137642741203308 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.7843291759490967, + "learning_rate": 3.539106549081972e-06, + "loss": 0.2809, + "step": 816, + "teacher_loss": 0.22496187686920166 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.28351259231567383, + "learning_rate": 3.543443689460749e-06, + "loss": 0.1745, + "step": 817, + "teacher_loss": 0.1624288558959961 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.23178696632385254, + "learning_rate": 3.5477808298395257e-06, + "loss": 0.1979, + "step": 818, + "teacher_loss": 0.194082111120224 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.3313000202178955, + "learning_rate": 3.552117970218303e-06, + "loss": 0.2733, + "step": 819, + "teacher_loss": 0.26689624786376953 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.22444593906402588, + "learning_rate": 3.55645511059708e-06, + "loss": 0.2739, + "step": 820, + "teacher_loss": 0.27940744161605835 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.7567064166069031, + "learning_rate": 3.560792250975857e-06, + "loss": 0.2502, + "step": 821, + "teacher_loss": 0.19388672709465027 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.46008479595184326, + "learning_rate": 3.5651293913546336e-06, + "loss": 0.2726, + "step": 822, + "teacher_loss": 0.2518126964569092 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.30962055921554565, + "learning_rate": 3.569466531733411e-06, + "loss": 0.2582, + "step": 823, + "teacher_loss": 0.2525278329849243 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.20213857293128967, + "learning_rate": 3.5738036721121876e-06, + "loss": 0.1983, + "step": 824, + "teacher_loss": 0.19783489406108856 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.5922117233276367, + "learning_rate": 3.5781408124909643e-06, + "loss": 0.2429, + "step": 825, + "teacher_loss": 0.2040407508611679 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4371291399002075, + "learning_rate": 3.582477952869741e-06, + "loss": 0.1662, + "step": 826, + "teacher_loss": 0.13614767789840698 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.35752737522125244, + "learning_rate": 3.5868150932485183e-06, + "loss": 0.2022, + "step": 827, + "teacher_loss": 0.18489113450050354 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.35045942664146423, + "learning_rate": 3.591152233627295e-06, + "loss": 0.2335, + "step": 828, + "teacher_loss": 0.2205122709274292 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.6536198258399963, + "learning_rate": 3.595489374006072e-06, + "loss": 0.334, + "step": 829, + "teacher_loss": 0.29846251010894775 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4388755261898041, + "learning_rate": 3.5998265143848486e-06, + "loss": 0.4085, + "step": 830, + "teacher_loss": 0.40517014265060425 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.38399872183799744, + "learning_rate": 3.6041636547636263e-06, + "loss": 0.2304, + "step": 831, + "teacher_loss": 0.21332237124443054 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.3681285083293915, + "learning_rate": 3.608500795142403e-06, + "loss": 0.2265, + "step": 832, + "teacher_loss": 0.21076641976833344 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.3287276327610016, + "learning_rate": 3.61283793552118e-06, + "loss": 0.204, + "step": 833, + "teacher_loss": 0.19010718166828156 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.29119449853897095, + "learning_rate": 3.6171750758999566e-06, + "loss": 0.2318, + "step": 834, + "teacher_loss": 0.2252272218465805 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.32534223794937134, + "learning_rate": 3.621512216278734e-06, + "loss": 0.216, + "step": 835, + "teacher_loss": 0.20379836857318878 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.3137243986129761, + "learning_rate": 3.6258493566575106e-06, + "loss": 0.2288, + "step": 836, + "teacher_loss": 0.21933528780937195 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4292237162590027, + "learning_rate": 3.6301864970362873e-06, + "loss": 0.2345, + "step": 837, + "teacher_loss": 0.2128177285194397 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.471017062664032, + "learning_rate": 3.6345236374150645e-06, + "loss": 0.2339, + "step": 838, + "teacher_loss": 0.20754003524780273 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4699787199497223, + "learning_rate": 3.6388607777938413e-06, + "loss": 0.3017, + "step": 839, + "teacher_loss": 0.28298401832580566 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4896871745586395, + "learning_rate": 3.643197918172618e-06, + "loss": 0.3399, + "step": 840, + "teacher_loss": 0.32330477237701416 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.5529024004936218, + "learning_rate": 3.647535058551395e-06, + "loss": 0.2489, + "step": 841, + "teacher_loss": 0.21517637372016907 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.6582509875297546, + "learning_rate": 3.6518721989301725e-06, + "loss": 0.3614, + "step": 842, + "teacher_loss": 0.32845115661621094 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.3299567401409149, + "learning_rate": 3.6562093393089493e-06, + "loss": 0.2812, + "step": 843, + "teacher_loss": 0.2757299244403839 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.48203617334365845, + "learning_rate": 3.660546479687726e-06, + "loss": 0.2247, + "step": 844, + "teacher_loss": 0.1960657238960266 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.16762351989746094, + "learning_rate": 3.664883620066503e-06, + "loss": 0.2199, + "step": 845, + "teacher_loss": 0.22566986083984375 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.22216686606407166, + "learning_rate": 3.66922076044528e-06, + "loss": 0.2187, + "step": 846, + "teacher_loss": 0.21833837032318115 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.13990309834480286, + "learning_rate": 3.6735579008240568e-06, + "loss": 0.1713, + "step": 847, + "teacher_loss": 0.1747407615184784 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.3876246213912964, + "learning_rate": 3.6778950412028336e-06, + "loss": 0.2496, + "step": 848, + "teacher_loss": 0.23422543704509735 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.5054432153701782, + "learning_rate": 3.6822321815816103e-06, + "loss": 0.2898, + "step": 849, + "teacher_loss": 0.26583167910575867 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.6304320096969604, + "learning_rate": 3.6865693219603875e-06, + "loss": 0.4114, + "step": 850, + "teacher_loss": 0.38704848289489746 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.5840238332748413, + "learning_rate": 3.6909064623391643e-06, + "loss": 0.2817, + "step": 851, + "teacher_loss": 0.24810011684894562 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.365255206823349, + "learning_rate": 3.695243602717941e-06, + "loss": 0.2353, + "step": 852, + "teacher_loss": 0.2208411544561386 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.4353649616241455, + "learning_rate": 3.6995807430967187e-06, + "loss": 0.2637, + "step": 853, + "teacher_loss": 0.244678795337677 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.36997756361961365, + "learning_rate": 3.7039178834754955e-06, + "loss": 0.2801, + "step": 854, + "teacher_loss": 0.2701045870780945 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.7335597276687622, + "learning_rate": 3.7082550238542723e-06, + "loss": 0.3561, + "step": 855, + "teacher_loss": 0.3141207993030548 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.41204988956451416, + "learning_rate": 3.712592164233049e-06, + "loss": 0.1982, + "step": 856, + "teacher_loss": 0.17448459565639496 + }, + { + "compression_loss": 0.0, + "epoch": 0.15, + "label_loss": 0.5471560955047607, + "learning_rate": 3.7169293046118262e-06, + "loss": 0.2551, + "step": 857, + "teacher_loss": 0.22263610363006592 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.37322646379470825, + "learning_rate": 3.721266444990603e-06, + "loss": 0.3297, + "step": 858, + "teacher_loss": 0.32486236095428467 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.32819563150405884, + "learning_rate": 3.7256035853693798e-06, + "loss": 0.2302, + "step": 859, + "teacher_loss": 0.21925684809684753 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7161264419555664, + "learning_rate": 3.7299407257481566e-06, + "loss": 0.2541, + "step": 860, + "teacher_loss": 0.20278343558311462 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.20098334550857544, + "learning_rate": 3.7342778661269338e-06, + "loss": 0.2012, + "step": 861, + "teacher_loss": 0.20126613974571228 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.5543959140777588, + "learning_rate": 3.7386150065057105e-06, + "loss": 0.3197, + "step": 862, + "teacher_loss": 0.2936192452907562 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7394939661026001, + "learning_rate": 3.7429521468844873e-06, + "loss": 0.2605, + "step": 863, + "teacher_loss": 0.20730775594711304 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.5929688811302185, + "learning_rate": 3.7472892872632645e-06, + "loss": 0.3508, + "step": 864, + "teacher_loss": 0.3238885998725891 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.4986763596534729, + "learning_rate": 3.7516264276420413e-06, + "loss": 0.2952, + "step": 865, + "teacher_loss": 0.2726404070854187 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.62636798620224, + "learning_rate": 3.7559635680208185e-06, + "loss": 0.2636, + "step": 866, + "teacher_loss": 0.22326195240020752 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.33991825580596924, + "learning_rate": 3.760300708399595e-06, + "loss": 0.2213, + "step": 867, + "teacher_loss": 0.2080913633108139 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.18533429503440857, + "learning_rate": 3.7646378487783725e-06, + "loss": 0.2037, + "step": 868, + "teacher_loss": 0.20570705831050873 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.32129088044166565, + "learning_rate": 3.7689749891571497e-06, + "loss": 0.2381, + "step": 869, + "teacher_loss": 0.2288992702960968 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.4956677556037903, + "learning_rate": 3.773312129535926e-06, + "loss": 0.202, + "step": 870, + "teacher_loss": 0.16936850547790527 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.16438832879066467, + "learning_rate": 3.777649269914703e-06, + "loss": 0.2944, + "step": 871, + "teacher_loss": 0.30880677700042725 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.2171880602836609, + "learning_rate": 3.78198641029348e-06, + "loss": 0.206, + "step": 872, + "teacher_loss": 0.2047433853149414 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.43466800451278687, + "learning_rate": 3.786323550672257e-06, + "loss": 0.3843, + "step": 873, + "teacher_loss": 0.37875521183013916 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.35497087240219116, + "learning_rate": 3.7906606910510335e-06, + "loss": 0.23, + "step": 874, + "teacher_loss": 0.21608425676822662 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.3560202717781067, + "learning_rate": 3.7949978314298107e-06, + "loss": 0.2734, + "step": 875, + "teacher_loss": 0.2641996741294861 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.38263845443725586, + "learning_rate": 3.7993349718085875e-06, + "loss": 0.2777, + "step": 876, + "teacher_loss": 0.26604828238487244 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.8546239137649536, + "learning_rate": 3.8036721121873647e-06, + "loss": 0.3397, + "step": 877, + "teacher_loss": 0.28249073028564453 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.643721342086792, + "learning_rate": 3.808009252566141e-06, + "loss": 0.4905, + "step": 878, + "teacher_loss": 0.47351616621017456 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.9456126689910889, + "learning_rate": 3.8123463929449182e-06, + "loss": 0.3847, + "step": 879, + "teacher_loss": 0.32232964038848877 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.3652101159095764, + "learning_rate": 3.816683533323696e-06, + "loss": 0.2347, + "step": 880, + "teacher_loss": 0.22023239731788635 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.42043590545654297, + "learning_rate": 3.821020673702472e-06, + "loss": 0.2241, + "step": 881, + "teacher_loss": 0.20229294896125793 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7193540930747986, + "learning_rate": 3.825357814081249e-06, + "loss": 0.3352, + "step": 882, + "teacher_loss": 0.2925070524215698 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.32793840765953064, + "learning_rate": 3.829694954460026e-06, + "loss": 0.25, + "step": 883, + "teacher_loss": 0.24136817455291748 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.41589248180389404, + "learning_rate": 3.834032094838803e-06, + "loss": 0.3227, + "step": 884, + "teacher_loss": 0.31234976649284363 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7969814538955688, + "learning_rate": 3.83836923521758e-06, + "loss": 0.4113, + "step": 885, + "teacher_loss": 0.3685019910335541 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.29240015149116516, + "learning_rate": 3.842706375596357e-06, + "loss": 0.1815, + "step": 886, + "teacher_loss": 0.16915994882583618 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.2281486988067627, + "learning_rate": 3.847043515975133e-06, + "loss": 0.2525, + "step": 887, + "teacher_loss": 0.2552439868450165 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.3892339766025543, + "learning_rate": 3.851380656353911e-06, + "loss": 0.2172, + "step": 888, + "teacher_loss": 0.1981118619441986 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7245720624923706, + "learning_rate": 3.855717796732688e-06, + "loss": 0.3159, + "step": 889, + "teacher_loss": 0.2705182433128357 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.47479361295700073, + "learning_rate": 3.8600549371114645e-06, + "loss": 0.2649, + "step": 890, + "teacher_loss": 0.24153810739517212 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.6211075186729431, + "learning_rate": 3.864392077490242e-06, + "loss": 0.2258, + "step": 891, + "teacher_loss": 0.18184706568717957 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.2326255738735199, + "learning_rate": 3.868729217869018e-06, + "loss": 0.2311, + "step": 892, + "teacher_loss": 0.2309640347957611 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.48016250133514404, + "learning_rate": 3.873066358247796e-06, + "loss": 0.2291, + "step": 893, + "teacher_loss": 0.2012297809123993 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.6621298789978027, + "learning_rate": 3.877403498626572e-06, + "loss": 0.3372, + "step": 894, + "teacher_loss": 0.30114662647247314 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.5290963649749756, + "learning_rate": 3.881740639005349e-06, + "loss": 0.3426, + "step": 895, + "teacher_loss": 0.32189279794692993 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 1.1647858619689941, + "learning_rate": 3.886077779384126e-06, + "loss": 0.6473, + "step": 896, + "teacher_loss": 0.5897899270057678 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.25802403688430786, + "learning_rate": 3.890414919762904e-06, + "loss": 0.2678, + "step": 897, + "teacher_loss": 0.2688629627227783 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.6035110950469971, + "learning_rate": 3.8947520601416795e-06, + "loss": 0.303, + "step": 898, + "teacher_loss": 0.2696284353733063 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.2295677214860916, + "learning_rate": 3.899089200520457e-06, + "loss": 0.2082, + "step": 899, + "teacher_loss": 0.2058761715888977 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.12190845608711243, + "learning_rate": 3.903426340899234e-06, + "loss": 0.1636, + "step": 900, + "teacher_loss": 0.1681968867778778 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.3050462603569031, + "learning_rate": 3.907763481278011e-06, + "loss": 0.268, + "step": 901, + "teacher_loss": 0.26387637853622437 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7868713140487671, + "learning_rate": 3.912100621656788e-06, + "loss": 0.4171, + "step": 902, + "teacher_loss": 0.37606674432754517 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.30534982681274414, + "learning_rate": 3.916437762035564e-06, + "loss": 0.2834, + "step": 903, + "teacher_loss": 0.28098100423812866 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.42476966977119446, + "learning_rate": 3.920774902414342e-06, + "loss": 0.27, + "step": 904, + "teacher_loss": 0.25281471014022827 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.5270523428916931, + "learning_rate": 3.925112042793119e-06, + "loss": 0.2844, + "step": 905, + "teacher_loss": 0.25741684436798096 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.7185977697372437, + "learning_rate": 3.929449183171895e-06, + "loss": 0.2836, + "step": 906, + "teacher_loss": 0.23525749146938324 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.36288803815841675, + "learning_rate": 3.933786323550672e-06, + "loss": 0.3229, + "step": 907, + "teacher_loss": 0.3184909224510193 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.2687336206436157, + "learning_rate": 3.93812346392945e-06, + "loss": 0.2188, + "step": 908, + "teacher_loss": 0.21328972280025482 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.41602492332458496, + "learning_rate": 3.942460604308226e-06, + "loss": 0.2366, + "step": 909, + "teacher_loss": 0.2166227549314499 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.6163256168365479, + "learning_rate": 3.946797744687003e-06, + "loss": 0.317, + "step": 910, + "teacher_loss": 0.2836914360523224 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.5708070993423462, + "learning_rate": 3.951134885065779e-06, + "loss": 0.2507, + "step": 911, + "teacher_loss": 0.21518346667289734 + }, + { + "compression_loss": 0.0, + "epoch": 0.16, + "label_loss": 0.4501948058605194, + "learning_rate": 3.955472025444557e-06, + "loss": 0.2577, + "step": 912, + "teacher_loss": 0.2363569736480713 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3216378092765808, + "learning_rate": 3.9598091658233345e-06, + "loss": 0.2317, + "step": 913, + "teacher_loss": 0.22169330716133118 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.25488758087158203, + "learning_rate": 3.9641463062021105e-06, + "loss": 0.2633, + "step": 914, + "teacher_loss": 0.26423513889312744 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4387619197368622, + "learning_rate": 3.968483446580888e-06, + "loss": 0.2419, + "step": 915, + "teacher_loss": 0.21997150778770447 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.5765150785446167, + "learning_rate": 3.972820586959665e-06, + "loss": 0.3512, + "step": 916, + "teacher_loss": 0.3261568546295166 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.27555862069129944, + "learning_rate": 3.977157727338442e-06, + "loss": 0.2252, + "step": 917, + "teacher_loss": 0.2195887714624405 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4694218933582306, + "learning_rate": 3.981494867717218e-06, + "loss": 0.3226, + "step": 918, + "teacher_loss": 0.30624938011169434 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3344815969467163, + "learning_rate": 3.985832008095996e-06, + "loss": 0.2241, + "step": 919, + "teacher_loss": 0.21181178092956543 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.348363995552063, + "learning_rate": 3.990169148474772e-06, + "loss": 0.1689, + "step": 920, + "teacher_loss": 0.14895710349082947 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.8943019509315491, + "learning_rate": 3.9945062888535496e-06, + "loss": 0.2893, + "step": 921, + "teacher_loss": 0.22203224897384644 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4587143063545227, + "learning_rate": 3.9988434292323255e-06, + "loss": 0.2592, + "step": 922, + "teacher_loss": 0.23705679178237915 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.7997756600379944, + "learning_rate": 4.003180569611103e-06, + "loss": 0.2722, + "step": 923, + "teacher_loss": 0.21352726221084595 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.40306591987609863, + "learning_rate": 4.007517709989881e-06, + "loss": 0.2787, + "step": 924, + "teacher_loss": 0.26491859555244446 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4491243362426758, + "learning_rate": 4.011854850368657e-06, + "loss": 0.3019, + "step": 925, + "teacher_loss": 0.2855908274650574 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.27633893489837646, + "learning_rate": 4.016191990747434e-06, + "loss": 0.3247, + "step": 926, + "teacher_loss": 0.33006787300109863 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.21397623419761658, + "learning_rate": 4.020529131126211e-06, + "loss": 0.2027, + "step": 927, + "teacher_loss": 0.2014533281326294 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.2860625088214874, + "learning_rate": 4.024866271504988e-06, + "loss": 0.1887, + "step": 928, + "teacher_loss": 0.17782723903656006 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.27117055654525757, + "learning_rate": 4.029203411883765e-06, + "loss": 0.378, + "step": 929, + "teacher_loss": 0.3898550271987915 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.68213951587677, + "learning_rate": 4.033540552262541e-06, + "loss": 0.3452, + "step": 930, + "teacher_loss": 0.3077096939086914 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4420263171195984, + "learning_rate": 4.037877692641318e-06, + "loss": 0.2725, + "step": 931, + "teacher_loss": 0.25371670722961426 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.6770023107528687, + "learning_rate": 4.042214833020096e-06, + "loss": 0.2602, + "step": 932, + "teacher_loss": 0.21393685042858124 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.6111568212509155, + "learning_rate": 4.046551973398872e-06, + "loss": 0.3414, + "step": 933, + "teacher_loss": 0.3114704191684723 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.38260430097579956, + "learning_rate": 4.050889113777649e-06, + "loss": 0.3422, + "step": 934, + "teacher_loss": 0.3376636207103729 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3411727845668793, + "learning_rate": 4.055226254156427e-06, + "loss": 0.2725, + "step": 935, + "teacher_loss": 0.2648276090621948 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4407883286476135, + "learning_rate": 4.059563394535203e-06, + "loss": 0.2447, + "step": 936, + "teacher_loss": 0.22286444902420044 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4329412579536438, + "learning_rate": 4.0639005349139805e-06, + "loss": 0.2539, + "step": 937, + "teacher_loss": 0.23398561775684357 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3359575569629669, + "learning_rate": 4.068237675292757e-06, + "loss": 0.2118, + "step": 938, + "teacher_loss": 0.19796612858772278 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.2284258008003235, + "learning_rate": 4.072574815671534e-06, + "loss": 0.2204, + "step": 939, + "teacher_loss": 0.21945300698280334 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3847157955169678, + "learning_rate": 4.076911956050311e-06, + "loss": 0.3722, + "step": 940, + "teacher_loss": 0.3708217740058899 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4290144145488739, + "learning_rate": 4.081249096429088e-06, + "loss": 0.2433, + "step": 941, + "teacher_loss": 0.2226441204547882 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3952246308326721, + "learning_rate": 4.085586236807864e-06, + "loss": 0.2295, + "step": 942, + "teacher_loss": 0.2111111581325531 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.38891148567199707, + "learning_rate": 4.089923377186642e-06, + "loss": 0.2309, + "step": 943, + "teacher_loss": 0.213353231549263 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.41349172592163086, + "learning_rate": 4.094260517565418e-06, + "loss": 0.1751, + "step": 944, + "teacher_loss": 0.14863823354244232 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3463779091835022, + "learning_rate": 4.0985976579441956e-06, + "loss": 0.235, + "step": 945, + "teacher_loss": 0.22262157499790192 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4911763072013855, + "learning_rate": 4.102934798322973e-06, + "loss": 0.2219, + "step": 946, + "teacher_loss": 0.19197635352611542 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.1033777967095375, + "learning_rate": 4.107271938701749e-06, + "loss": 0.1856, + "step": 947, + "teacher_loss": 0.19476903975009918 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.43637940287590027, + "learning_rate": 4.111609079080527e-06, + "loss": 0.2801, + "step": 948, + "teacher_loss": 0.26268404722213745 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.5513020157814026, + "learning_rate": 4.1159462194593035e-06, + "loss": 0.2252, + "step": 949, + "teacher_loss": 0.18901792168617249 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 1.0793309211730957, + "learning_rate": 4.12028335983808e-06, + "loss": 0.3689, + "step": 950, + "teacher_loss": 0.28991031646728516 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.2533317506313324, + "learning_rate": 4.124620500216857e-06, + "loss": 0.1982, + "step": 951, + "teacher_loss": 0.1921074092388153 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.7881702184677124, + "learning_rate": 4.128957640595634e-06, + "loss": 0.3531, + "step": 952, + "teacher_loss": 0.304771363735199 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.5719931125640869, + "learning_rate": 4.133294780974411e-06, + "loss": 0.2478, + "step": 953, + "teacher_loss": 0.2117629051208496 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.2875203490257263, + "learning_rate": 4.137631921353188e-06, + "loss": 0.2829, + "step": 954, + "teacher_loss": 0.2823842465877533 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.36733829975128174, + "learning_rate": 4.141969061731964e-06, + "loss": 0.3096, + "step": 955, + "teacher_loss": 0.30319488048553467 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.4917277693748474, + "learning_rate": 4.146306202110742e-06, + "loss": 0.2711, + "step": 956, + "teacher_loss": 0.2465406060218811 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.246895432472229, + "learning_rate": 4.150643342489519e-06, + "loss": 0.1765, + "step": 957, + "teacher_loss": 0.16867899894714355 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.47302472591400146, + "learning_rate": 4.154980482868295e-06, + "loss": 0.244, + "step": 958, + "teacher_loss": 0.21852673590183258 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3434694707393646, + "learning_rate": 4.159317623247073e-06, + "loss": 0.2134, + "step": 959, + "teacher_loss": 0.1989946961402893 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.18457627296447754, + "learning_rate": 4.163654763625849e-06, + "loss": 0.285, + "step": 960, + "teacher_loss": 0.29612693190574646 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.5879428386688232, + "learning_rate": 4.1679919040046265e-06, + "loss": 0.3018, + "step": 961, + "teacher_loss": 0.2700411081314087 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.1663154661655426, + "learning_rate": 4.172329044383403e-06, + "loss": 0.3135, + "step": 962, + "teacher_loss": 0.32984256744384766 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.7105588912963867, + "learning_rate": 4.17666618476218e-06, + "loss": 0.2601, + "step": 963, + "teacher_loss": 0.2100663185119629 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.299935519695282, + "learning_rate": 4.181003325140957e-06, + "loss": 0.2596, + "step": 964, + "teacher_loss": 0.25516477227211 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.5807290077209473, + "learning_rate": 4.1853404655197345e-06, + "loss": 0.3152, + "step": 965, + "teacher_loss": 0.2857227921485901 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.3001972436904907, + "learning_rate": 4.18967760589851e-06, + "loss": 0.2347, + "step": 966, + "teacher_loss": 0.22739841043949127 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.30118033289909363, + "learning_rate": 4.194014746277288e-06, + "loss": 0.4572, + "step": 967, + "teacher_loss": 0.47455668449401855 + }, + { + "compression_loss": 0.0, + "epoch": 0.17, + "label_loss": 0.5492435097694397, + "learning_rate": 4.198351886656066e-06, + "loss": 0.2427, + "step": 968, + "teacher_loss": 0.20863686501979828 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.21092335879802704, + "learning_rate": 4.2026890270348416e-06, + "loss": 0.2932, + "step": 969, + "teacher_loss": 0.3023689091205597 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5165823698043823, + "learning_rate": 4.207026167413619e-06, + "loss": 0.2737, + "step": 970, + "teacher_loss": 0.24669763445854187 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.46799975633621216, + "learning_rate": 4.211363307792395e-06, + "loss": 0.2835, + "step": 971, + "teacher_loss": 0.2630111873149872 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.29865220189094543, + "learning_rate": 4.215700448171173e-06, + "loss": 0.305, + "step": 972, + "teacher_loss": 0.30571699142456055 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5653126835823059, + "learning_rate": 4.2200375885499495e-06, + "loss": 0.3436, + "step": 973, + "teacher_loss": 0.3189849853515625 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.21941082179546356, + "learning_rate": 4.224374728928726e-06, + "loss": 0.2397, + "step": 974, + "teacher_loss": 0.2419101595878601 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.47118890285491943, + "learning_rate": 4.228711869307503e-06, + "loss": 0.253, + "step": 975, + "teacher_loss": 0.22870707511901855 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.3474152982234955, + "learning_rate": 4.233049009686281e-06, + "loss": 0.1994, + "step": 976, + "teacher_loss": 0.18298810720443726 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.36741209030151367, + "learning_rate": 4.237386150065057e-06, + "loss": 0.3737, + "step": 977, + "teacher_loss": 0.3743492364883423 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5597560405731201, + "learning_rate": 4.241723290443834e-06, + "loss": 0.2816, + "step": 978, + "teacher_loss": 0.25066542625427246 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.525507390499115, + "learning_rate": 4.246060430822611e-06, + "loss": 0.2572, + "step": 979, + "teacher_loss": 0.22739559412002563 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.6766448020935059, + "learning_rate": 4.250397571201388e-06, + "loss": 0.2679, + "step": 980, + "teacher_loss": 0.2225266993045807 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.28461331129074097, + "learning_rate": 4.254734711580165e-06, + "loss": 0.2248, + "step": 981, + "teacher_loss": 0.21820171177387238 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.9696887731552124, + "learning_rate": 4.259071851958941e-06, + "loss": 0.3294, + "step": 982, + "teacher_loss": 0.25827687978744507 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.17163234949111938, + "learning_rate": 4.263408992337719e-06, + "loss": 0.233, + "step": 983, + "teacher_loss": 0.23976437747478485 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.7183161973953247, + "learning_rate": 4.267746132716496e-06, + "loss": 0.2647, + "step": 984, + "teacher_loss": 0.2143322080373764 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.2817261219024658, + "learning_rate": 4.2720832730952725e-06, + "loss": 0.2704, + "step": 985, + "teacher_loss": 0.26910319924354553 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.6363096237182617, + "learning_rate": 4.276420413474049e-06, + "loss": 0.2522, + "step": 986, + "teacher_loss": 0.2095108926296234 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.24661116302013397, + "learning_rate": 4.280757553852827e-06, + "loss": 0.1652, + "step": 987, + "teacher_loss": 0.15610413253307343 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5053436160087585, + "learning_rate": 4.285094694231603e-06, + "loss": 0.3204, + "step": 988, + "teacher_loss": 0.2998276948928833 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.3370142877101898, + "learning_rate": 4.2894318346103804e-06, + "loss": 0.2765, + "step": 989, + "teacher_loss": 0.26980364322662354 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.377845823764801, + "learning_rate": 4.293768974989157e-06, + "loss": 0.2562, + "step": 990, + "teacher_loss": 0.24268172681331635 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.9990074038505554, + "learning_rate": 4.298106115367934e-06, + "loss": 0.4299, + "step": 991, + "teacher_loss": 0.36664068698883057 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.7937301993370056, + "learning_rate": 4.302443255746712e-06, + "loss": 0.3371, + "step": 992, + "teacher_loss": 0.28632599115371704 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.517193078994751, + "learning_rate": 4.3067803961254875e-06, + "loss": 0.2072, + "step": 993, + "teacher_loss": 0.17272207140922546 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5028003454208374, + "learning_rate": 4.311117536504265e-06, + "loss": 0.2493, + "step": 994, + "teacher_loss": 0.22113189101219177 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.15499143302440643, + "learning_rate": 4.315454676883042e-06, + "loss": 0.1958, + "step": 995, + "teacher_loss": 0.20028001070022583 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.39688703417778015, + "learning_rate": 4.319791817261819e-06, + "loss": 0.3163, + "step": 996, + "teacher_loss": 0.3073402941226959 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.30999478697776794, + "learning_rate": 4.3241289576405955e-06, + "loss": 0.199, + "step": 997, + "teacher_loss": 0.1867075115442276 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.6614822149276733, + "learning_rate": 4.328466098019373e-06, + "loss": 0.3184, + "step": 998, + "teacher_loss": 0.28031837940216064 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.4407710134983063, + "learning_rate": 4.332803238398149e-06, + "loss": 0.3742, + "step": 999, + "teacher_loss": 0.3667859435081482 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.30677372217178345, + "learning_rate": 4.337140378776927e-06, + "loss": 0.1966, + "step": 1000, + "teacher_loss": 0.18439361453056335 + }, + { + "epoch": 0.18, + "eval_exact_match": 79.78240302743615, + "eval_f1": 87.28467191236393, + "step": 1000 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.41976457834243774, + "learning_rate": 4.3414775191557034e-06, + "loss": 0.2564, + "step": 1001, + "teacher_loss": 0.23828575015068054 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.2679102122783661, + "learning_rate": 4.34581465953448e-06, + "loss": 0.3375, + "step": 1002, + "teacher_loss": 0.3452507257461548 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.3169446587562561, + "learning_rate": 4.350151799913258e-06, + "loss": 0.1705, + "step": 1003, + "teacher_loss": 0.15423138439655304 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.6178823113441467, + "learning_rate": 4.354488940292034e-06, + "loss": 0.266, + "step": 1004, + "teacher_loss": 0.22686412930488586 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.7176684737205505, + "learning_rate": 4.358826080670811e-06, + "loss": 0.3019, + "step": 1005, + "teacher_loss": 0.255659282207489 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.4044558107852936, + "learning_rate": 4.363163221049588e-06, + "loss": 0.2802, + "step": 1006, + "teacher_loss": 0.2664405405521393 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.6271200180053711, + "learning_rate": 4.367500361428365e-06, + "loss": 0.3949, + "step": 1007, + "teacher_loss": 0.3691057562828064 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.3779192566871643, + "learning_rate": 4.371837501807142e-06, + "loss": 0.2336, + "step": 1008, + "teacher_loss": 0.2175370156764984 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5824831128120422, + "learning_rate": 4.3761746421859185e-06, + "loss": 0.2541, + "step": 1009, + "teacher_loss": 0.21766166388988495 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 1.0577467679977417, + "learning_rate": 4.380511782564696e-06, + "loss": 0.3168, + "step": 1010, + "teacher_loss": 0.2344709038734436 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.26988738775253296, + "learning_rate": 4.384848922943473e-06, + "loss": 0.4745, + "step": 1011, + "teacher_loss": 0.497225284576416 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.46977198123931885, + "learning_rate": 4.38918606332225e-06, + "loss": 0.2836, + "step": 1012, + "teacher_loss": 0.2628623843193054 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.3277967572212219, + "learning_rate": 4.3935232037010264e-06, + "loss": 0.2179, + "step": 1013, + "teacher_loss": 0.20572246611118317 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.20961245894432068, + "learning_rate": 4.397860344079804e-06, + "loss": 0.1465, + "step": 1014, + "teacher_loss": 0.13948100805282593 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5755942463874817, + "learning_rate": 4.40219748445858e-06, + "loss": 0.5469, + "step": 1015, + "teacher_loss": 0.5436673760414124 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5191944241523743, + "learning_rate": 4.406534624837358e-06, + "loss": 0.2942, + "step": 1016, + "teacher_loss": 0.26924625039100647 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.7318869829177856, + "learning_rate": 4.410871765216134e-06, + "loss": 0.2502, + "step": 1017, + "teacher_loss": 0.19667673110961914 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.43240630626678467, + "learning_rate": 4.415208905594911e-06, + "loss": 0.2781, + "step": 1018, + "teacher_loss": 0.26097458600997925 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.4053493142127991, + "learning_rate": 4.419546045973688e-06, + "loss": 0.2275, + "step": 1019, + "teacher_loss": 0.20779326558113098 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5808550119400024, + "learning_rate": 4.423883186352465e-06, + "loss": 0.2726, + "step": 1020, + "teacher_loss": 0.23830586671829224 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.7242401838302612, + "learning_rate": 4.428220326731242e-06, + "loss": 0.378, + "step": 1021, + "teacher_loss": 0.3394874334335327 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.5987971425056458, + "learning_rate": 4.432557467110019e-06, + "loss": 0.3065, + "step": 1022, + "teacher_loss": 0.27407407760620117 + }, + { + "compression_loss": 0.0, + "epoch": 0.18, + "label_loss": 0.3604764938354492, + "learning_rate": 4.436894607488796e-06, + "loss": 0.2761, + "step": 1023, + "teacher_loss": 0.2667540907859802 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.42976105213165283, + "learning_rate": 4.441231747867573e-06, + "loss": 0.2526, + "step": 1024, + "teacher_loss": 0.232865571975708 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.16539642214775085, + "learning_rate": 4.44556888824635e-06, + "loss": 0.2126, + "step": 1025, + "teacher_loss": 0.21789035201072693 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.5467285513877869, + "learning_rate": 4.449906028625126e-06, + "loss": 0.3692, + "step": 1026, + "teacher_loss": 0.34948933124542236 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3324883282184601, + "learning_rate": 4.454243169003904e-06, + "loss": 0.2313, + "step": 1027, + "teacher_loss": 0.22007356584072113 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.7447936534881592, + "learning_rate": 4.458580309382681e-06, + "loss": 0.2757, + "step": 1028, + "teacher_loss": 0.22361455857753754 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.4618903398513794, + "learning_rate": 4.462917449761457e-06, + "loss": 0.3314, + "step": 1029, + "teacher_loss": 0.31691259145736694 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.15198522806167603, + "learning_rate": 4.467254590140234e-06, + "loss": 0.3453, + "step": 1030, + "teacher_loss": 0.36675846576690674 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.2747849225997925, + "learning_rate": 4.471591730519011e-06, + "loss": 0.199, + "step": 1031, + "teacher_loss": 0.1905422806739807 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.30247190594673157, + "learning_rate": 4.4759288708977885e-06, + "loss": 0.2477, + "step": 1032, + "teacher_loss": 0.24163810908794403 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3867129683494568, + "learning_rate": 4.480266011276565e-06, + "loss": 0.2692, + "step": 1033, + "teacher_loss": 0.25610053539276123 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.40048548579216003, + "learning_rate": 4.484603151655342e-06, + "loss": 0.2814, + "step": 1034, + "teacher_loss": 0.26817089319229126 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.2737944722175598, + "learning_rate": 4.488940292034119e-06, + "loss": 0.2076, + "step": 1035, + "teacher_loss": 0.20022031664848328 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.9053131937980652, + "learning_rate": 4.4932774324128965e-06, + "loss": 0.3598, + "step": 1036, + "teacher_loss": 0.29918426275253296 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.2241610884666443, + "learning_rate": 4.497614572791672e-06, + "loss": 0.1736, + "step": 1037, + "teacher_loss": 0.16797776520252228 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3201694190502167, + "learning_rate": 4.50195171317045e-06, + "loss": 0.2327, + "step": 1038, + "teacher_loss": 0.22295695543289185 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 1.0477876663208008, + "learning_rate": 4.506288853549226e-06, + "loss": 0.3322, + "step": 1039, + "teacher_loss": 0.2526901662349701 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.9910594820976257, + "learning_rate": 4.510625993928004e-06, + "loss": 0.2853, + "step": 1040, + "teacher_loss": 0.20692741870880127 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.8933700323104858, + "learning_rate": 4.51496313430678e-06, + "loss": 0.419, + "step": 1041, + "teacher_loss": 0.3662818670272827 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.571254312992096, + "learning_rate": 4.519300274685557e-06, + "loss": 0.2237, + "step": 1042, + "teacher_loss": 0.1850433051586151 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.33391740918159485, + "learning_rate": 4.523637415064335e-06, + "loss": 0.2283, + "step": 1043, + "teacher_loss": 0.2165614366531372 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.4566960036754608, + "learning_rate": 4.5279745554431115e-06, + "loss": 0.3055, + "step": 1044, + "teacher_loss": 0.2886720299720764 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.13232797384262085, + "learning_rate": 4.532311695821888e-06, + "loss": 0.2133, + "step": 1045, + "teacher_loss": 0.22229985892772675 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.4978542923927307, + "learning_rate": 4.536648836200665e-06, + "loss": 0.4331, + "step": 1046, + "teacher_loss": 0.425857275724411 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.43347957730293274, + "learning_rate": 4.540985976579443e-06, + "loss": 0.2149, + "step": 1047, + "teacher_loss": 0.1905868649482727 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.4231417775154114, + "learning_rate": 4.545323116958219e-06, + "loss": 0.26, + "step": 1048, + "teacher_loss": 0.24186962842941284 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.22906342148780823, + "learning_rate": 4.549660257336996e-06, + "loss": 0.244, + "step": 1049, + "teacher_loss": 0.24570153653621674 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.33008861541748047, + "learning_rate": 4.553997397715772e-06, + "loss": 0.1647, + "step": 1050, + "teacher_loss": 0.1463623195886612 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.8421276807785034, + "learning_rate": 4.55833453809455e-06, + "loss": 0.2869, + "step": 1051, + "teacher_loss": 0.22520504891872406 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3828859329223633, + "learning_rate": 4.562671678473327e-06, + "loss": 0.1979, + "step": 1052, + "teacher_loss": 0.17732752859592438 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3831285238265991, + "learning_rate": 4.567008818852103e-06, + "loss": 0.2119, + "step": 1053, + "teacher_loss": 0.19282452762126923 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.9916056394577026, + "learning_rate": 4.571345959230881e-06, + "loss": 0.3011, + "step": 1054, + "teacher_loss": 0.22434478998184204 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.2853778600692749, + "learning_rate": 4.575683099609658e-06, + "loss": 0.2319, + "step": 1055, + "teacher_loss": 0.22595274448394775 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.7721850275993347, + "learning_rate": 4.5800202399884345e-06, + "loss": 0.2815, + "step": 1056, + "teacher_loss": 0.22701841592788696 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.6067239046096802, + "learning_rate": 4.584357380367211e-06, + "loss": 0.3036, + "step": 1057, + "teacher_loss": 0.26994168758392334 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.20231294631958008, + "learning_rate": 4.588694520745988e-06, + "loss": 0.2927, + "step": 1058, + "teacher_loss": 0.30278515815734863 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3941829204559326, + "learning_rate": 4.593031661124765e-06, + "loss": 0.2034, + "step": 1059, + "teacher_loss": 0.18223124742507935 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.5052745938301086, + "learning_rate": 4.5973688015035425e-06, + "loss": 0.2361, + "step": 1060, + "teacher_loss": 0.20615960657596588 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.7895598411560059, + "learning_rate": 4.601705941882318e-06, + "loss": 0.6353, + "step": 1061, + "teacher_loss": 0.6181142330169678 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.4553050994873047, + "learning_rate": 4.606043082261096e-06, + "loss": 0.2421, + "step": 1062, + "teacher_loss": 0.21843793988227844 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.32229432463645935, + "learning_rate": 4.610380222639873e-06, + "loss": 0.1757, + "step": 1063, + "teacher_loss": 0.15940703451633453 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.31394755840301514, + "learning_rate": 4.61471736301865e-06, + "loss": 0.2284, + "step": 1064, + "teacher_loss": 0.2188643217086792 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.34707412123680115, + "learning_rate": 4.619054503397427e-06, + "loss": 0.2911, + "step": 1065, + "teacher_loss": 0.28493136167526245 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.9305747747421265, + "learning_rate": 4.623391643776204e-06, + "loss": 0.2861, + "step": 1066, + "teacher_loss": 0.21446290612220764 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.32297974824905396, + "learning_rate": 4.627728784154981e-06, + "loss": 0.3485, + "step": 1067, + "teacher_loss": 0.3513873517513275 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.26276522874832153, + "learning_rate": 4.6320659245337575e-06, + "loss": 0.2225, + "step": 1068, + "teacher_loss": 0.21806611120700836 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.5694539546966553, + "learning_rate": 4.636403064912534e-06, + "loss": 0.2047, + "step": 1069, + "teacher_loss": 0.16413669288158417 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.14550527930259705, + "learning_rate": 4.640740205291311e-06, + "loss": 0.2139, + "step": 1070, + "teacher_loss": 0.22152139246463776 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.3342948257923126, + "learning_rate": 4.645077345670089e-06, + "loss": 0.1953, + "step": 1071, + "teacher_loss": 0.17984285950660706 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.46232205629348755, + "learning_rate": 4.649414486048865e-06, + "loss": 0.2226, + "step": 1072, + "teacher_loss": 0.19598373770713806 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.5358340740203857, + "learning_rate": 4.653751626427642e-06, + "loss": 0.2848, + "step": 1073, + "teacher_loss": 0.2568589150905609 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.28656986355781555, + "learning_rate": 4.658088766806419e-06, + "loss": 0.2304, + "step": 1074, + "teacher_loss": 0.2241937220096588 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.9465980529785156, + "learning_rate": 4.662425907185196e-06, + "loss": 0.2695, + "step": 1075, + "teacher_loss": 0.1942925751209259 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.1416025459766388, + "learning_rate": 4.6667630475639734e-06, + "loss": 0.1563, + "step": 1076, + "teacher_loss": 0.15792804956436157 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.901451587677002, + "learning_rate": 4.67110018794275e-06, + "loss": 0.2931, + "step": 1077, + "teacher_loss": 0.22555328905582428 + }, + { + "compression_loss": 0.0, + "epoch": 0.19, + "label_loss": 0.20931003987789154, + "learning_rate": 4.675437328321527e-06, + "loss": 0.1872, + "step": 1078, + "teacher_loss": 0.18474730849266052 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5381642580032349, + "learning_rate": 4.679774468700304e-06, + "loss": 0.2182, + "step": 1079, + "teacher_loss": 0.18265162408351898 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.47066718339920044, + "learning_rate": 4.6841116090790805e-06, + "loss": 0.2324, + "step": 1080, + "teacher_loss": 0.20595771074295044 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.36368829011917114, + "learning_rate": 4.688448749457857e-06, + "loss": 0.2713, + "step": 1081, + "teacher_loss": 0.26098522543907166 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.4694782495498657, + "learning_rate": 4.692785889836635e-06, + "loss": 0.3416, + "step": 1082, + "teacher_loss": 0.32733774185180664 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.7196818590164185, + "learning_rate": 4.697123030215411e-06, + "loss": 0.3201, + "step": 1083, + "teacher_loss": 0.27571550011634827 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.6905295848846436, + "learning_rate": 4.7014601705941885e-06, + "loss": 0.2846, + "step": 1084, + "teacher_loss": 0.23951539397239685 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.1392497569322586, + "learning_rate": 4.705797310972965e-06, + "loss": 0.1434, + "step": 1085, + "teacher_loss": 0.14389224350452423 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5574511885643005, + "learning_rate": 4.710134451351742e-06, + "loss": 0.3486, + "step": 1086, + "teacher_loss": 0.3254011571407318 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.38188010454177856, + "learning_rate": 4.71447159173052e-06, + "loss": 0.2628, + "step": 1087, + "teacher_loss": 0.24957598745822906 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.31982582807540894, + "learning_rate": 4.7188087321092956e-06, + "loss": 0.2104, + "step": 1088, + "teacher_loss": 0.1982215791940689 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.21952366828918457, + "learning_rate": 4.723145872488073e-06, + "loss": 0.2193, + "step": 1089, + "teacher_loss": 0.21923795342445374 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.47833967208862305, + "learning_rate": 4.72748301286685e-06, + "loss": 0.3999, + "step": 1090, + "teacher_loss": 0.3912278413772583 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.07976051419973373, + "learning_rate": 4.731820153245627e-06, + "loss": 0.1795, + "step": 1091, + "teacher_loss": 0.19058480858802795 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.27420559525489807, + "learning_rate": 4.7361572936244035e-06, + "loss": 0.1994, + "step": 1092, + "teacher_loss": 0.19112759828567505 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 1.1061021089553833, + "learning_rate": 4.740494434003181e-06, + "loss": 0.4018, + "step": 1093, + "teacher_loss": 0.32357919216156006 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.28886157274246216, + "learning_rate": 4.744831574381957e-06, + "loss": 0.2188, + "step": 1094, + "teacher_loss": 0.21104061603546143 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3894529938697815, + "learning_rate": 4.749168714760735e-06, + "loss": 0.2085, + "step": 1095, + "teacher_loss": 0.18841558694839478 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5072648525238037, + "learning_rate": 4.7535058551395115e-06, + "loss": 0.223, + "step": 1096, + "teacher_loss": 0.19137945771217346 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5903101563453674, + "learning_rate": 4.757842995518288e-06, + "loss": 0.2683, + "step": 1097, + "teacher_loss": 0.23246756196022034 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.28118568658828735, + "learning_rate": 4.762180135897066e-06, + "loss": 0.2612, + "step": 1098, + "teacher_loss": 0.2589266896247864 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3604264259338379, + "learning_rate": 4.766517276275842e-06, + "loss": 0.2536, + "step": 1099, + "teacher_loss": 0.24169890582561493 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3828474283218384, + "learning_rate": 4.770854416654619e-06, + "loss": 0.2326, + "step": 1100, + "teacher_loss": 0.21589769423007965 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.2525019645690918, + "learning_rate": 4.775191557033396e-06, + "loss": 0.2747, + "step": 1101, + "teacher_loss": 0.2771334648132324 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5346746444702148, + "learning_rate": 4.779528697412173e-06, + "loss": 0.3208, + "step": 1102, + "teacher_loss": 0.29699212312698364 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.4957199692726135, + "learning_rate": 4.78386583779095e-06, + "loss": 0.2465, + "step": 1103, + "teacher_loss": 0.2187555432319641 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.47427546977996826, + "learning_rate": 4.788202978169727e-06, + "loss": 0.3496, + "step": 1104, + "teacher_loss": 0.3356937766075134 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3891294002532959, + "learning_rate": 4.792540118548503e-06, + "loss": 0.3032, + "step": 1105, + "teacher_loss": 0.2936500906944275 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.28724467754364014, + "learning_rate": 4.796877258927281e-06, + "loss": 0.2505, + "step": 1106, + "teacher_loss": 0.24637135863304138 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.31420186161994934, + "learning_rate": 4.801214399306058e-06, + "loss": 0.3102, + "step": 1107, + "teacher_loss": 0.309751033782959 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3099789023399353, + "learning_rate": 4.8055515396848345e-06, + "loss": 0.2141, + "step": 1108, + "teacher_loss": 0.20348715782165527 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.2552195191383362, + "learning_rate": 4.809888680063612e-06, + "loss": 0.1964, + "step": 1109, + "teacher_loss": 0.18982470035552979 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3421964645385742, + "learning_rate": 4.814225820442388e-06, + "loss": 0.2672, + "step": 1110, + "teacher_loss": 0.25882214307785034 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.4639921188354492, + "learning_rate": 4.818562960821166e-06, + "loss": 0.2925, + "step": 1111, + "teacher_loss": 0.27346134185791016 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.2397266924381256, + "learning_rate": 4.822900101199942e-06, + "loss": 0.2084, + "step": 1112, + "teacher_loss": 0.20486527681350708 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5527971386909485, + "learning_rate": 4.827237241578719e-06, + "loss": 0.3695, + "step": 1113, + "teacher_loss": 0.3491598069667816 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 1.2085955142974854, + "learning_rate": 4.831574381957496e-06, + "loss": 0.5834, + "step": 1114, + "teacher_loss": 0.513923704624176 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.8424012660980225, + "learning_rate": 4.835911522336274e-06, + "loss": 0.3221, + "step": 1115, + "teacher_loss": 0.26428303122520447 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.6601627469062805, + "learning_rate": 4.8402486627150495e-06, + "loss": 0.3147, + "step": 1116, + "teacher_loss": 0.2762717604637146 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.7519962191581726, + "learning_rate": 4.844585803093827e-06, + "loss": 0.4587, + "step": 1117, + "teacher_loss": 0.4260726869106293 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.4877069592475891, + "learning_rate": 4.848922943472603e-06, + "loss": 0.276, + "step": 1118, + "teacher_loss": 0.25252410769462585 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.468178927898407, + "learning_rate": 4.853260083851381e-06, + "loss": 0.2282, + "step": 1119, + "teacher_loss": 0.2015511691570282 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5334605574607849, + "learning_rate": 4.857597224230158e-06, + "loss": 0.2762, + "step": 1120, + "teacher_loss": 0.2476247102022171 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.5600509643554688, + "learning_rate": 4.861934364608934e-06, + "loss": 0.298, + "step": 1121, + "teacher_loss": 0.2689228355884552 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.4051320552825928, + "learning_rate": 4.866271504987712e-06, + "loss": 0.2891, + "step": 1122, + "teacher_loss": 0.27618369460105896 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.4193379282951355, + "learning_rate": 4.870608645366489e-06, + "loss": 0.3517, + "step": 1123, + "teacher_loss": 0.3441656529903412 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.45917046070098877, + "learning_rate": 4.874945785745265e-06, + "loss": 0.4287, + "step": 1124, + "teacher_loss": 0.4253392517566681 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.619087815284729, + "learning_rate": 4.879282926124042e-06, + "loss": 0.3703, + "step": 1125, + "teacher_loss": 0.34262222051620483 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.29534435272216797, + "learning_rate": 4.88362006650282e-06, + "loss": 0.2016, + "step": 1126, + "teacher_loss": 0.1911729872226715 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.44164717197418213, + "learning_rate": 4.887957206881596e-06, + "loss": 0.1802, + "step": 1127, + "teacher_loss": 0.15113690495491028 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.24396923184394836, + "learning_rate": 4.892294347260373e-06, + "loss": 0.2387, + "step": 1128, + "teacher_loss": 0.23813480138778687 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.3436773121356964, + "learning_rate": 4.896631487639149e-06, + "loss": 0.2825, + "step": 1129, + "teacher_loss": 0.2757401466369629 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.401786208152771, + "learning_rate": 4.900968628017927e-06, + "loss": 0.249, + "step": 1130, + "teacher_loss": 0.23198971152305603 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.666278600692749, + "learning_rate": 4.9053057683967045e-06, + "loss": 0.2255, + "step": 1131, + "teacher_loss": 0.1764870584011078 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 1.011008381843567, + "learning_rate": 4.9096429087754805e-06, + "loss": 0.321, + "step": 1132, + "teacher_loss": 0.24438059329986572 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.9981403350830078, + "learning_rate": 4.913980049154258e-06, + "loss": 0.328, + "step": 1133, + "teacher_loss": 0.2534977197647095 + }, + { + "compression_loss": 0.0, + "epoch": 0.2, + "label_loss": 0.582736074924469, + "learning_rate": 4.918317189533035e-06, + "loss": 0.3337, + "step": 1134, + "teacher_loss": 0.306058406829834 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.8159241676330566, + "learning_rate": 4.922654329911812e-06, + "loss": 0.3222, + "step": 1135, + "teacher_loss": 0.26738640666007996 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.5094940662384033, + "learning_rate": 4.926991470290588e-06, + "loss": 0.4148, + "step": 1136, + "teacher_loss": 0.40424269437789917 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.11156779527664185, + "learning_rate": 4.931328610669365e-06, + "loss": 0.1679, + "step": 1137, + "teacher_loss": 0.17410853505134583 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.9740431308746338, + "learning_rate": 4.935665751048142e-06, + "loss": 0.2645, + "step": 1138, + "teacher_loss": 0.18562020361423492 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.23955108225345612, + "learning_rate": 4.9400028914269196e-06, + "loss": 0.1559, + "step": 1139, + "teacher_loss": 0.14657802879810333 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.6785324811935425, + "learning_rate": 4.9443400318056955e-06, + "loss": 0.3267, + "step": 1140, + "teacher_loss": 0.2876512110233307 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.6967428922653198, + "learning_rate": 4.948677172184473e-06, + "loss": 0.314, + "step": 1141, + "teacher_loss": 0.2714667320251465 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3970792293548584, + "learning_rate": 4.953014312563251e-06, + "loss": 0.2588, + "step": 1142, + "teacher_loss": 0.24341876804828644 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.6833770275115967, + "learning_rate": 4.957351452942027e-06, + "loss": 0.3242, + "step": 1143, + "teacher_loss": 0.28425711393356323 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.37923964858055115, + "learning_rate": 4.961688593320804e-06, + "loss": 0.2005, + "step": 1144, + "teacher_loss": 0.1806732714176178 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.226164311170578, + "learning_rate": 4.966025733699581e-06, + "loss": 0.2611, + "step": 1145, + "teacher_loss": 0.2649730443954468 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.31378278136253357, + "learning_rate": 4.970362874078358e-06, + "loss": 0.1946, + "step": 1146, + "teacher_loss": 0.1813521385192871 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.8112713098526001, + "learning_rate": 4.974700014457135e-06, + "loss": 0.2909, + "step": 1147, + "teacher_loss": 0.2330630123615265 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.4197642207145691, + "learning_rate": 4.979037154835911e-06, + "loss": 0.2076, + "step": 1148, + "teacher_loss": 0.18404710292816162 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.44166573882102966, + "learning_rate": 4.983374295214688e-06, + "loss": 0.2258, + "step": 1149, + "teacher_loss": 0.20184940099716187 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.27114400267601013, + "learning_rate": 4.987711435593466e-06, + "loss": 0.2065, + "step": 1150, + "teacher_loss": 0.19927412271499634 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.655838131904602, + "learning_rate": 4.992048575972242e-06, + "loss": 0.3295, + "step": 1151, + "teacher_loss": 0.2932407855987549 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.5922248363494873, + "learning_rate": 4.996385716351019e-06, + "loss": 0.3031, + "step": 1152, + "teacher_loss": 0.270932137966156 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3755096197128296, + "learning_rate": 5.000722856729797e-06, + "loss": 0.1986, + "step": 1153, + "teacher_loss": 0.17897866666316986 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.4813052713871002, + "learning_rate": 5.005059997108573e-06, + "loss": 0.1804, + "step": 1154, + "teacher_loss": 0.14693206548690796 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.6488795280456543, + "learning_rate": 5.0093971374873505e-06, + "loss": 0.2595, + "step": 1155, + "teacher_loss": 0.21618717908859253 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.2763577699661255, + "learning_rate": 5.013734277866127e-06, + "loss": 0.228, + "step": 1156, + "teacher_loss": 0.2226552665233612 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3682769536972046, + "learning_rate": 5.018071418244904e-06, + "loss": 0.3396, + "step": 1157, + "teacher_loss": 0.33641955256462097 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.5189266204833984, + "learning_rate": 5.022408558623681e-06, + "loss": 0.3612, + "step": 1158, + "teacher_loss": 0.3437250256538391 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.2635785937309265, + "learning_rate": 5.026745699002458e-06, + "loss": 0.2292, + "step": 1159, + "teacher_loss": 0.22533442080020905 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.48142436146736145, + "learning_rate": 5.031082839381234e-06, + "loss": 0.2287, + "step": 1160, + "teacher_loss": 0.2006313055753708 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.5742180347442627, + "learning_rate": 5.035419979760012e-06, + "loss": 0.2903, + "step": 1161, + "teacher_loss": 0.25876420736312866 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.47835981845855713, + "learning_rate": 5.039757120138789e-06, + "loss": 0.314, + "step": 1162, + "teacher_loss": 0.2957807779312134 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.32022857666015625, + "learning_rate": 5.0440942605175656e-06, + "loss": 0.264, + "step": 1163, + "teacher_loss": 0.25777360796928406 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.4389013648033142, + "learning_rate": 5.048431400896343e-06, + "loss": 0.2758, + "step": 1164, + "teacher_loss": 0.2576468884944916 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.30154094099998474, + "learning_rate": 5.052768541275119e-06, + "loss": 0.2313, + "step": 1165, + "teacher_loss": 0.22344811260700226 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.23638620972633362, + "learning_rate": 5.057105681653897e-06, + "loss": 0.1495, + "step": 1166, + "teacher_loss": 0.13989299535751343 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.27809587121009827, + "learning_rate": 5.061442822032673e-06, + "loss": 0.2921, + "step": 1167, + "teacher_loss": 0.2936674952507019 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.17922601103782654, + "learning_rate": 5.06577996241145e-06, + "loss": 0.2428, + "step": 1168, + "teacher_loss": 0.2498663365840912 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.4638948440551758, + "learning_rate": 5.070117102790227e-06, + "loss": 0.2533, + "step": 1169, + "teacher_loss": 0.22988645732402802 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3509288728237152, + "learning_rate": 5.074454243169004e-06, + "loss": 0.2361, + "step": 1170, + "teacher_loss": 0.2233295738697052 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.13288934528827667, + "learning_rate": 5.078791383547781e-06, + "loss": 0.174, + "step": 1171, + "teacher_loss": 0.17857147753238678 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.49124500155448914, + "learning_rate": 5.083128523926558e-06, + "loss": 0.2575, + "step": 1172, + "teacher_loss": 0.23157858848571777 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.7791959047317505, + "learning_rate": 5.087465664305335e-06, + "loss": 0.2826, + "step": 1173, + "teacher_loss": 0.22747664153575897 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.7260560989379883, + "learning_rate": 5.091802804684112e-06, + "loss": 0.3169, + "step": 1174, + "teacher_loss": 0.2714585065841675 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.33840230107307434, + "learning_rate": 5.096139945062889e-06, + "loss": 0.2518, + "step": 1175, + "teacher_loss": 0.24213892221450806 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.2387653887271881, + "learning_rate": 5.100477085441665e-06, + "loss": 0.293, + "step": 1176, + "teacher_loss": 0.29897743463516235 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.2110324203968048, + "learning_rate": 5.104814225820443e-06, + "loss": 0.2154, + "step": 1177, + "teacher_loss": 0.21584412455558777 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3478757441043854, + "learning_rate": 5.109151366199219e-06, + "loss": 0.2402, + "step": 1178, + "teacher_loss": 0.22825860977172852 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3471631705760956, + "learning_rate": 5.1134885065779965e-06, + "loss": 0.3092, + "step": 1179, + "teacher_loss": 0.30500495433807373 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.38604047894477844, + "learning_rate": 5.117825646956773e-06, + "loss": 0.206, + "step": 1180, + "teacher_loss": 0.18596185743808746 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.7693082094192505, + "learning_rate": 5.12216278733555e-06, + "loss": 0.3538, + "step": 1181, + "teacher_loss": 0.3076856732368469 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.4554254114627838, + "learning_rate": 5.126499927714327e-06, + "loss": 0.4417, + "step": 1182, + "teacher_loss": 0.4401906132698059 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.3787423074245453, + "learning_rate": 5.1308370680931044e-06, + "loss": 0.1972, + "step": 1183, + "teacher_loss": 0.1769973188638687 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.30436742305755615, + "learning_rate": 5.135174208471881e-06, + "loss": 0.2488, + "step": 1184, + "teacher_loss": 0.2426188439130783 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 1.0952677726745605, + "learning_rate": 5.139511348850658e-06, + "loss": 0.3315, + "step": 1185, + "teacher_loss": 0.24662281572818756 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.6754557490348816, + "learning_rate": 5.143848489229435e-06, + "loss": 0.3952, + "step": 1186, + "teacher_loss": 0.36401450634002686 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.553848385810852, + "learning_rate": 5.1481856296082115e-06, + "loss": 0.2686, + "step": 1187, + "teacher_loss": 0.23694336414337158 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.6661457419395447, + "learning_rate": 5.152522769986989e-06, + "loss": 0.2712, + "step": 1188, + "teacher_loss": 0.22731655836105347 + }, + { + "compression_loss": 0.0, + "epoch": 0.21, + "label_loss": 0.43331173062324524, + "learning_rate": 5.156859910365765e-06, + "loss": 0.2492, + "step": 1189, + "teacher_loss": 0.22869017720222473 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.7166616916656494, + "learning_rate": 5.161197050744543e-06, + "loss": 0.2524, + "step": 1190, + "teacher_loss": 0.2007940113544464 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4378998875617981, + "learning_rate": 5.1655341911233195e-06, + "loss": 0.3477, + "step": 1191, + "teacher_loss": 0.33770930767059326 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.1930282860994339, + "learning_rate": 5.169871331502096e-06, + "loss": 0.2316, + "step": 1192, + "teacher_loss": 0.2358579933643341 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.25824838876724243, + "learning_rate": 5.174208471880873e-06, + "loss": 0.2358, + "step": 1193, + "teacher_loss": 0.23328590393066406 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.6622394323348999, + "learning_rate": 5.178545612259651e-06, + "loss": 0.3084, + "step": 1194, + "teacher_loss": 0.26912936568260193 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.8537653684616089, + "learning_rate": 5.1828827526384274e-06, + "loss": 0.2982, + "step": 1195, + "teacher_loss": 0.23648113012313843 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.7899073958396912, + "learning_rate": 5.187219893017204e-06, + "loss": 0.3672, + "step": 1196, + "teacher_loss": 0.32022690773010254 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.8777328729629517, + "learning_rate": 5.191557033395981e-06, + "loss": 0.2678, + "step": 1197, + "teacher_loss": 0.19998799264431 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.47831371426582336, + "learning_rate": 5.195894173774758e-06, + "loss": 0.2511, + "step": 1198, + "teacher_loss": 0.22583192586898804 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.2943691909313202, + "learning_rate": 5.200231314153535e-06, + "loss": 0.2539, + "step": 1199, + "teacher_loss": 0.24943341314792633 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.28227388858795166, + "learning_rate": 5.204568454532311e-06, + "loss": 0.2746, + "step": 1200, + "teacher_loss": 0.27375128865242004 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.2694704532623291, + "learning_rate": 5.208905594911089e-06, + "loss": 0.2548, + "step": 1201, + "teacher_loss": 0.25319626927375793 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.356032133102417, + "learning_rate": 5.213242735289866e-06, + "loss": 0.2655, + "step": 1202, + "teacher_loss": 0.2554033398628235 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.21529226005077362, + "learning_rate": 5.2175798756686425e-06, + "loss": 0.2096, + "step": 1203, + "teacher_loss": 0.20893582701683044 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.7583473920822144, + "learning_rate": 5.221917016047419e-06, + "loss": 0.2712, + "step": 1204, + "teacher_loss": 0.2171146720647812 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3672451674938202, + "learning_rate": 5.226254156426197e-06, + "loss": 0.1939, + "step": 1205, + "teacher_loss": 0.17466257512569427 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4197983741760254, + "learning_rate": 5.230591296804974e-06, + "loss": 0.2593, + "step": 1206, + "teacher_loss": 0.24141529202461243 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3416770100593567, + "learning_rate": 5.2349284371837504e-06, + "loss": 0.2444, + "step": 1207, + "teacher_loss": 0.23355409502983093 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.35045433044433594, + "learning_rate": 5.239265577562527e-06, + "loss": 0.1875, + "step": 1208, + "teacher_loss": 0.16938243806362152 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4604479670524597, + "learning_rate": 5.243602717941304e-06, + "loss": 0.3014, + "step": 1209, + "teacher_loss": 0.2837444543838501 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.35232654213905334, + "learning_rate": 5.247939858320082e-06, + "loss": 0.252, + "step": 1210, + "teacher_loss": 0.24088945984840393 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.6564865112304688, + "learning_rate": 5.2522769986988575e-06, + "loss": 0.2302, + "step": 1211, + "teacher_loss": 0.18283820152282715 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3088138699531555, + "learning_rate": 5.256614139077635e-06, + "loss": 0.3209, + "step": 1212, + "teacher_loss": 0.32225584983825684 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.6396206617355347, + "learning_rate": 5.260951279456412e-06, + "loss": 0.305, + "step": 1213, + "teacher_loss": 0.2678562104701996 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.6946617364883423, + "learning_rate": 5.265288419835189e-06, + "loss": 0.4101, + "step": 1214, + "teacher_loss": 0.37847232818603516 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.829551100730896, + "learning_rate": 5.2696255602139655e-06, + "loss": 0.2812, + "step": 1215, + "teacher_loss": 0.2202252447605133 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.7917488813400269, + "learning_rate": 5.273962700592742e-06, + "loss": 0.2726, + "step": 1216, + "teacher_loss": 0.21489733457565308 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.2725987434387207, + "learning_rate": 5.27829984097152e-06, + "loss": 0.2404, + "step": 1217, + "teacher_loss": 0.23681502044200897 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.5505666136741638, + "learning_rate": 5.282636981350297e-06, + "loss": 0.2664, + "step": 1218, + "teacher_loss": 0.23480895161628723 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.46690648794174194, + "learning_rate": 5.2869741217290734e-06, + "loss": 0.2156, + "step": 1219, + "teacher_loss": 0.18770459294319153 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.24107155203819275, + "learning_rate": 5.29131126210785e-06, + "loss": 0.1939, + "step": 1220, + "teacher_loss": 0.1887015402317047 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.27591270208358765, + "learning_rate": 5.295648402486628e-06, + "loss": 0.2407, + "step": 1221, + "teacher_loss": 0.2367565929889679 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.5319687128067017, + "learning_rate": 5.299985542865404e-06, + "loss": 0.3498, + "step": 1222, + "teacher_loss": 0.3295632004737854 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.2172059714794159, + "learning_rate": 5.304322683244181e-06, + "loss": 0.3703, + "step": 1223, + "teacher_loss": 0.38734835386276245 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3539636731147766, + "learning_rate": 5.308659823622958e-06, + "loss": 0.1869, + "step": 1224, + "teacher_loss": 0.16837289929389954 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3852180540561676, + "learning_rate": 5.312996964001735e-06, + "loss": 0.2492, + "step": 1225, + "teacher_loss": 0.2341034710407257 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4535210132598877, + "learning_rate": 5.317334104380512e-06, + "loss": 0.227, + "step": 1226, + "teacher_loss": 0.20178814232349396 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.6944804191589355, + "learning_rate": 5.3216712447592885e-06, + "loss": 0.2949, + "step": 1227, + "teacher_loss": 0.25045156478881836 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.5647241473197937, + "learning_rate": 5.326008385138066e-06, + "loss": 0.3082, + "step": 1228, + "teacher_loss": 0.2797269821166992 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4358232021331787, + "learning_rate": 5.330345525516843e-06, + "loss": 0.3087, + "step": 1229, + "teacher_loss": 0.2945837378501892 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3156379461288452, + "learning_rate": 5.33468266589562e-06, + "loss": 0.261, + "step": 1230, + "teacher_loss": 0.2549268305301666 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.33211833238601685, + "learning_rate": 5.3390198062743964e-06, + "loss": 0.2429, + "step": 1231, + "teacher_loss": 0.2330237179994583 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3029274046421051, + "learning_rate": 5.343356946653174e-06, + "loss": 0.2221, + "step": 1232, + "teacher_loss": 0.2131727933883667 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4400797486305237, + "learning_rate": 5.34769408703195e-06, + "loss": 0.2533, + "step": 1233, + "teacher_loss": 0.23258748650550842 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3303312063217163, + "learning_rate": 5.352031227410728e-06, + "loss": 0.2428, + "step": 1234, + "teacher_loss": 0.23308271169662476 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.6181873083114624, + "learning_rate": 5.356368367789504e-06, + "loss": 0.287, + "step": 1235, + "teacher_loss": 0.25020280480384827 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.2569912075996399, + "learning_rate": 5.360705508168281e-06, + "loss": 0.1979, + "step": 1236, + "teacher_loss": 0.1913888156414032 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3329373002052307, + "learning_rate": 5.365042648547058e-06, + "loss": 0.2163, + "step": 1237, + "teacher_loss": 0.20329231023788452 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.19664104282855988, + "learning_rate": 5.369379788925835e-06, + "loss": 0.2012, + "step": 1238, + "teacher_loss": 0.20175495743751526 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.4789338707923889, + "learning_rate": 5.373716929304612e-06, + "loss": 0.194, + "step": 1239, + "teacher_loss": 0.16235429048538208 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.36036866903305054, + "learning_rate": 5.378054069683389e-06, + "loss": 0.2499, + "step": 1240, + "teacher_loss": 0.23765911161899567 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.17101141810417175, + "learning_rate": 5.382391210062166e-06, + "loss": 0.1686, + "step": 1241, + "teacher_loss": 0.16834668815135956 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.5667974948883057, + "learning_rate": 5.386728350440943e-06, + "loss": 0.3674, + "step": 1242, + "teacher_loss": 0.34528452157974243 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.3654319643974304, + "learning_rate": 5.39106549081972e-06, + "loss": 0.2378, + "step": 1243, + "teacher_loss": 0.2235853374004364 + }, + { + "compression_loss": 0.0, + "epoch": 0.22, + "label_loss": 0.13516873121261597, + "learning_rate": 5.395402631198496e-06, + "loss": 0.2272, + "step": 1244, + "teacher_loss": 0.23741930723190308 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4525141716003418, + "learning_rate": 5.399739771577274e-06, + "loss": 0.2478, + "step": 1245, + "teacher_loss": 0.2250119000673294 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5897389650344849, + "learning_rate": 5.40407691195605e-06, + "loss": 0.2729, + "step": 1246, + "teacher_loss": 0.23768490552902222 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.38645854592323303, + "learning_rate": 5.408414052334827e-06, + "loss": 0.2134, + "step": 1247, + "teacher_loss": 0.1941990852355957 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.7676335573196411, + "learning_rate": 5.412751192713604e-06, + "loss": 0.3584, + "step": 1248, + "teacher_loss": 0.3129536807537079 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.3984995484352112, + "learning_rate": 5.417088333092381e-06, + "loss": 0.2197, + "step": 1249, + "teacher_loss": 0.1997983753681183 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.31520527601242065, + "learning_rate": 5.4214254734711585e-06, + "loss": 0.1779, + "step": 1250, + "teacher_loss": 0.162623792886734 + }, + { + "epoch": 0.23, + "eval_exact_match": 79.85808893093662, + "eval_f1": 87.24254571758037, + "step": 1250 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.39510267972946167, + "learning_rate": 5.425762613849935e-06, + "loss": 0.3035, + "step": 1251, + "teacher_loss": 0.29329121112823486 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5366889834403992, + "learning_rate": 5.430099754228712e-06, + "loss": 0.2783, + "step": 1252, + "teacher_loss": 0.2495642602443695 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.598315954208374, + "learning_rate": 5.434436894607489e-06, + "loss": 0.3065, + "step": 1253, + "teacher_loss": 0.27412959933280945 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4712716341018677, + "learning_rate": 5.4387740349862665e-06, + "loss": 0.4155, + "step": 1254, + "teacher_loss": 0.40935713052749634 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5349586606025696, + "learning_rate": 5.443111175365042e-06, + "loss": 0.2506, + "step": 1255, + "teacher_loss": 0.21899788081645966 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.3278942406177521, + "learning_rate": 5.44744831574382e-06, + "loss": 0.1678, + "step": 1256, + "teacher_loss": 0.14998933672904968 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.20256701111793518, + "learning_rate": 5.451785456122596e-06, + "loss": 0.2116, + "step": 1257, + "teacher_loss": 0.21258941292762756 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.11660751700401306, + "learning_rate": 5.456122596501374e-06, + "loss": 0.2559, + "step": 1258, + "teacher_loss": 0.27139660716056824 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.46190154552459717, + "learning_rate": 5.46045973688015e-06, + "loss": 0.2599, + "step": 1259, + "teacher_loss": 0.23741409182548523 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.2473524510860443, + "learning_rate": 5.464796877258927e-06, + "loss": 0.1778, + "step": 1260, + "teacher_loss": 0.17002329230308533 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4944515824317932, + "learning_rate": 5.469134017637705e-06, + "loss": 0.2978, + "step": 1261, + "teacher_loss": 0.2759248614311218 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.8135677576065063, + "learning_rate": 5.4734711580164815e-06, + "loss": 0.3457, + "step": 1262, + "teacher_loss": 0.2936977446079254 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.23547472059726715, + "learning_rate": 5.477808298395258e-06, + "loss": 0.257, + "step": 1263, + "teacher_loss": 0.25939512252807617 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5694279074668884, + "learning_rate": 5.482145438774035e-06, + "loss": 0.3007, + "step": 1264, + "teacher_loss": 0.2708965837955475 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5643197298049927, + "learning_rate": 5.486482579152812e-06, + "loss": 0.3878, + "step": 1265, + "teacher_loss": 0.3681699335575104 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4379342794418335, + "learning_rate": 5.490819719531589e-06, + "loss": 0.2588, + "step": 1266, + "teacher_loss": 0.23889079689979553 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.2910255789756775, + "learning_rate": 5.495156859910366e-06, + "loss": 0.2604, + "step": 1267, + "teacher_loss": 0.2569883465766907 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.16718003153800964, + "learning_rate": 5.499494000289142e-06, + "loss": 0.2275, + "step": 1268, + "teacher_loss": 0.23424017429351807 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4267197251319885, + "learning_rate": 5.50383114066792e-06, + "loss": 0.2382, + "step": 1269, + "teacher_loss": 0.21722105145454407 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.1430625021457672, + "learning_rate": 5.508168281046697e-06, + "loss": 0.193, + "step": 1270, + "teacher_loss": 0.19852329790592194 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.45203936100006104, + "learning_rate": 5.512505421425473e-06, + "loss": 0.2597, + "step": 1271, + "teacher_loss": 0.23832622170448303 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.624198317527771, + "learning_rate": 5.516842561804251e-06, + "loss": 0.2792, + "step": 1272, + "teacher_loss": 0.24090111255645752 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.2790127992630005, + "learning_rate": 5.521179702183028e-06, + "loss": 0.1946, + "step": 1273, + "teacher_loss": 0.18517965078353882 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5398377776145935, + "learning_rate": 5.5255168425618045e-06, + "loss": 0.2293, + "step": 1274, + "teacher_loss": 0.19480839371681213 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5422208309173584, + "learning_rate": 5.529853982940581e-06, + "loss": 0.3687, + "step": 1275, + "teacher_loss": 0.349415123462677 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.6231808066368103, + "learning_rate": 5.534191123319358e-06, + "loss": 0.2541, + "step": 1276, + "teacher_loss": 0.21312439441680908 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4202580451965332, + "learning_rate": 5.538528263698135e-06, + "loss": 0.236, + "step": 1277, + "teacher_loss": 0.2155061662197113 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.29901033639907837, + "learning_rate": 5.5428654040769125e-06, + "loss": 0.2933, + "step": 1278, + "teacher_loss": 0.2926676869392395 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5168792605400085, + "learning_rate": 5.547202544455688e-06, + "loss": 0.2652, + "step": 1279, + "teacher_loss": 0.23727889358997345 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.31214818358421326, + "learning_rate": 5.551539684834466e-06, + "loss": 0.2335, + "step": 1280, + "teacher_loss": 0.2247873991727829 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.4879359304904938, + "learning_rate": 5.555876825213243e-06, + "loss": 0.2205, + "step": 1281, + "teacher_loss": 0.1908344328403473 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.14473803341388702, + "learning_rate": 5.56021396559202e-06, + "loss": 0.1511, + "step": 1282, + "teacher_loss": 0.1518411785364151 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.2977965474128723, + "learning_rate": 5.564551105970797e-06, + "loss": 0.1923, + "step": 1283, + "teacher_loss": 0.18056859076023102 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.38523414731025696, + "learning_rate": 5.568888246349574e-06, + "loss": 0.2626, + "step": 1284, + "teacher_loss": 0.24892790615558624 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.6362987756729126, + "learning_rate": 5.573225386728351e-06, + "loss": 0.3375, + "step": 1285, + "teacher_loss": 0.30428051948547363 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.1684516966342926, + "learning_rate": 5.5775625271071275e-06, + "loss": 0.19, + "step": 1286, + "teacher_loss": 0.19237330555915833 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.3424099087715149, + "learning_rate": 5.581899667485904e-06, + "loss": 0.2258, + "step": 1287, + "teacher_loss": 0.21282604336738586 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.6559836864471436, + "learning_rate": 5.586236807864681e-06, + "loss": 0.3504, + "step": 1288, + "teacher_loss": 0.31644487380981445 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.24155014753341675, + "learning_rate": 5.590573948243459e-06, + "loss": 0.2019, + "step": 1289, + "teacher_loss": 0.197509303689003 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.36160242557525635, + "learning_rate": 5.594911088622235e-06, + "loss": 0.1629, + "step": 1290, + "teacher_loss": 0.14078977704048157 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.20559725165367126, + "learning_rate": 5.599248229001012e-06, + "loss": 0.1684, + "step": 1291, + "teacher_loss": 0.16431768238544464 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.37003186345100403, + "learning_rate": 5.603585369379789e-06, + "loss": 0.3106, + "step": 1292, + "teacher_loss": 0.3040315508842468 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.23484672605991364, + "learning_rate": 5.607922509758566e-06, + "loss": 0.2924, + "step": 1293, + "teacher_loss": 0.2988301217556 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.8479714393615723, + "learning_rate": 5.612259650137343e-06, + "loss": 0.4411, + "step": 1294, + "teacher_loss": 0.3958855867385864 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5910420417785645, + "learning_rate": 5.616596790516119e-06, + "loss": 0.2619, + "step": 1295, + "teacher_loss": 0.22527837753295898 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.3322191834449768, + "learning_rate": 5.620933930894897e-06, + "loss": 0.2361, + "step": 1296, + "teacher_loss": 0.2253904938697815 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.39758002758026123, + "learning_rate": 5.625271071273674e-06, + "loss": 0.2863, + "step": 1297, + "teacher_loss": 0.273922324180603 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.17350620031356812, + "learning_rate": 5.6296082116524505e-06, + "loss": 0.2141, + "step": 1298, + "teacher_loss": 0.21862801909446716 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.3849458694458008, + "learning_rate": 5.633945352031227e-06, + "loss": 0.2712, + "step": 1299, + "teacher_loss": 0.2586025595664978 + }, + { + "compression_loss": 0.0, + "epoch": 0.23, + "label_loss": 0.5989031791687012, + "learning_rate": 5.638282492410005e-06, + "loss": 0.2403, + "step": 1300, + "teacher_loss": 0.2005019187927246 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.8836695551872253, + "learning_rate": 5.642619632788781e-06, + "loss": 0.3321, + "step": 1301, + "teacher_loss": 0.27083802223205566 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4654373526573181, + "learning_rate": 5.6469567731675585e-06, + "loss": 0.2394, + "step": 1302, + "teacher_loss": 0.2142777144908905 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.2677880823612213, + "learning_rate": 5.651293913546335e-06, + "loss": 0.2512, + "step": 1303, + "teacher_loss": 0.2493935525417328 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.08647237718105316, + "learning_rate": 5.655631053925112e-06, + "loss": 0.2215, + "step": 1304, + "teacher_loss": 0.23650875687599182 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4774653911590576, + "learning_rate": 5.65996819430389e-06, + "loss": 0.2775, + "step": 1305, + "teacher_loss": 0.2552984356880188 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.23576687276363373, + "learning_rate": 5.6643053346826656e-06, + "loss": 0.1949, + "step": 1306, + "teacher_loss": 0.19039157032966614 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.2447567880153656, + "learning_rate": 5.668642475061443e-06, + "loss": 0.3084, + "step": 1307, + "teacher_loss": 0.3154921233654022 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.35558998584747314, + "learning_rate": 5.67297961544022e-06, + "loss": 0.2093, + "step": 1308, + "teacher_loss": 0.1930309236049652 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.7455028295516968, + "learning_rate": 5.677316755818997e-06, + "loss": 0.2965, + "step": 1309, + "teacher_loss": 0.24666452407836914 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.2909244894981384, + "learning_rate": 5.6816538961977735e-06, + "loss": 0.3713, + "step": 1310, + "teacher_loss": 0.3802812993526459 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.2900809347629547, + "learning_rate": 5.685991036576551e-06, + "loss": 0.222, + "step": 1311, + "teacher_loss": 0.2144090235233307 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5351499319076538, + "learning_rate": 5.690328176955327e-06, + "loss": 0.2254, + "step": 1312, + "teacher_loss": 0.19096694886684418 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5194671154022217, + "learning_rate": 5.694665317334105e-06, + "loss": 0.2511, + "step": 1313, + "teacher_loss": 0.22131946682929993 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5641889572143555, + "learning_rate": 5.6990024577128815e-06, + "loss": 0.2561, + "step": 1314, + "teacher_loss": 0.22190842032432556 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.13668163120746613, + "learning_rate": 5.703339598091658e-06, + "loss": 0.1915, + "step": 1315, + "teacher_loss": 0.19760063290596008 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5113697648048401, + "learning_rate": 5.707676738470436e-06, + "loss": 0.3508, + "step": 1316, + "teacher_loss": 0.33297199010849 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5250779390335083, + "learning_rate": 5.712013878849212e-06, + "loss": 0.2526, + "step": 1317, + "teacher_loss": 0.22234031558036804 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.23730963468551636, + "learning_rate": 5.716351019227989e-06, + "loss": 0.3092, + "step": 1318, + "teacher_loss": 0.3171396851539612 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.31640592217445374, + "learning_rate": 5.720688159606766e-06, + "loss": 0.258, + "step": 1319, + "teacher_loss": 0.2515076994895935 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5580583810806274, + "learning_rate": 5.725025299985543e-06, + "loss": 0.2585, + "step": 1320, + "teacher_loss": 0.22516728937625885 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.46960604190826416, + "learning_rate": 5.72936244036432e-06, + "loss": 0.2795, + "step": 1321, + "teacher_loss": 0.2584110498428345 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.39644157886505127, + "learning_rate": 5.733699580743097e-06, + "loss": 0.2545, + "step": 1322, + "teacher_loss": 0.23873931169509888 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.2446393072605133, + "learning_rate": 5.738036721121873e-06, + "loss": 0.2173, + "step": 1323, + "teacher_loss": 0.2142748087644577 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4224982261657715, + "learning_rate": 5.742373861500651e-06, + "loss": 0.224, + "step": 1324, + "teacher_loss": 0.20190811157226562 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.42481768131256104, + "learning_rate": 5.746711001879428e-06, + "loss": 0.3055, + "step": 1325, + "teacher_loss": 0.292196124792099 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4198145866394043, + "learning_rate": 5.7510481422582045e-06, + "loss": 0.2451, + "step": 1326, + "teacher_loss": 0.22565031051635742 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.9430245161056519, + "learning_rate": 5.755385282636982e-06, + "loss": 0.2495, + "step": 1327, + "teacher_loss": 0.17241689562797546 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.7390469908714294, + "learning_rate": 5.759722423015758e-06, + "loss": 0.2744, + "step": 1328, + "teacher_loss": 0.22279001772403717 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 1.0234602689743042, + "learning_rate": 5.764059563394536e-06, + "loss": 0.3094, + "step": 1329, + "teacher_loss": 0.23007167875766754 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5311638116836548, + "learning_rate": 5.768396703773312e-06, + "loss": 0.2495, + "step": 1330, + "teacher_loss": 0.2181762158870697 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.625234842300415, + "learning_rate": 5.772733844152089e-06, + "loss": 0.3367, + "step": 1331, + "teacher_loss": 0.304587721824646 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4760865271091461, + "learning_rate": 5.777070984530866e-06, + "loss": 0.3806, + "step": 1332, + "teacher_loss": 0.3700268268585205 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5296129584312439, + "learning_rate": 5.7814081249096436e-06, + "loss": 0.2602, + "step": 1333, + "teacher_loss": 0.23023159801959991 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.1823360174894333, + "learning_rate": 5.7857452652884195e-06, + "loss": 0.214, + "step": 1334, + "teacher_loss": 0.21756044030189514 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.3620914816856384, + "learning_rate": 5.790082405667197e-06, + "loss": 0.2632, + "step": 1335, + "teacher_loss": 0.25224554538726807 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.28975558280944824, + "learning_rate": 5.794419546045974e-06, + "loss": 0.196, + "step": 1336, + "teacher_loss": 0.18557213246822357 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.3370620012283325, + "learning_rate": 5.798756686424751e-06, + "loss": 0.2489, + "step": 1337, + "teacher_loss": 0.23909465968608856 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.3273800015449524, + "learning_rate": 5.803093826803528e-06, + "loss": 0.2047, + "step": 1338, + "teacher_loss": 0.19109182059764862 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4376048743724823, + "learning_rate": 5.807430967182304e-06, + "loss": 0.3761, + "step": 1339, + "teacher_loss": 0.3692399263381958 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.3818703889846802, + "learning_rate": 5.811768107561082e-06, + "loss": 0.3125, + "step": 1340, + "teacher_loss": 0.30478206276893616 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.7969062328338623, + "learning_rate": 5.816105247939859e-06, + "loss": 0.3746, + "step": 1341, + "teacher_loss": 0.32772839069366455 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5678620934486389, + "learning_rate": 5.820442388318635e-06, + "loss": 0.3341, + "step": 1342, + "teacher_loss": 0.3081613779067993 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4068654179573059, + "learning_rate": 5.824779528697412e-06, + "loss": 0.2339, + "step": 1343, + "teacher_loss": 0.21470209956169128 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.5724692344665527, + "learning_rate": 5.829116669076189e-06, + "loss": 0.3003, + "step": 1344, + "teacher_loss": 0.27004462480545044 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.685820460319519, + "learning_rate": 5.833453809454966e-06, + "loss": 0.2911, + "step": 1345, + "teacher_loss": 0.2472207099199295 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.22002211213111877, + "learning_rate": 5.837790949833743e-06, + "loss": 0.1884, + "step": 1346, + "teacher_loss": 0.18486765027046204 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.2986704111099243, + "learning_rate": 5.84212809021252e-06, + "loss": 0.2203, + "step": 1347, + "teacher_loss": 0.21157175302505493 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.554895281791687, + "learning_rate": 5.846465230591297e-06, + "loss": 0.2153, + "step": 1348, + "teacher_loss": 0.17756152153015137 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.15596388280391693, + "learning_rate": 5.8508023709700745e-06, + "loss": 0.1756, + "step": 1349, + "teacher_loss": 0.17774304747581482 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.15273180603981018, + "learning_rate": 5.8551395113488504e-06, + "loss": 0.1577, + "step": 1350, + "teacher_loss": 0.15828032791614532 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.20243240892887115, + "learning_rate": 5.859476651727628e-06, + "loss": 0.1632, + "step": 1351, + "teacher_loss": 0.1588045060634613 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.19009071588516235, + "learning_rate": 5.863813792106405e-06, + "loss": 0.2208, + "step": 1352, + "teacher_loss": 0.22426247596740723 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.43891090154647827, + "learning_rate": 5.868150932485182e-06, + "loss": 0.4042, + "step": 1353, + "teacher_loss": 0.40037986636161804 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.4940303564071655, + "learning_rate": 5.872488072863958e-06, + "loss": 0.2474, + "step": 1354, + "teacher_loss": 0.21997055411338806 + }, + { + "compression_loss": 0.0, + "epoch": 0.24, + "label_loss": 0.6147751808166504, + "learning_rate": 5.876825213242735e-06, + "loss": 0.2412, + "step": 1355, + "teacher_loss": 0.19964075088500977 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4485546946525574, + "learning_rate": 5.881162353621512e-06, + "loss": 0.2457, + "step": 1356, + "teacher_loss": 0.22310753166675568 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.6731774806976318, + "learning_rate": 5.8854994940002896e-06, + "loss": 0.285, + "step": 1357, + "teacher_loss": 0.24181930720806122 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.7686455845832825, + "learning_rate": 5.889836634379066e-06, + "loss": 0.3114, + "step": 1358, + "teacher_loss": 0.26064521074295044 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 1.2206658124923706, + "learning_rate": 5.894173774757843e-06, + "loss": 0.4777, + "step": 1359, + "teacher_loss": 0.3951765298843384 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.3752766251564026, + "learning_rate": 5.898510915136621e-06, + "loss": 0.378, + "step": 1360, + "teacher_loss": 0.3783051073551178 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.48867619037628174, + "learning_rate": 5.902848055515397e-06, + "loss": 0.3041, + "step": 1361, + "teacher_loss": 0.28364354372024536 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.8336423635482788, + "learning_rate": 5.907185195894174e-06, + "loss": 0.306, + "step": 1362, + "teacher_loss": 0.2473602145910263 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4570959508419037, + "learning_rate": 5.91152233627295e-06, + "loss": 0.3004, + "step": 1363, + "teacher_loss": 0.282977819442749 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.31784719228744507, + "learning_rate": 5.915859476651728e-06, + "loss": 0.2566, + "step": 1364, + "teacher_loss": 0.24979303777217865 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.7443061470985413, + "learning_rate": 5.920196617030505e-06, + "loss": 0.2679, + "step": 1365, + "teacher_loss": 0.21493223309516907 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.2886722683906555, + "learning_rate": 5.924533757409281e-06, + "loss": 0.3029, + "step": 1366, + "teacher_loss": 0.3044867515563965 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.3999277949333191, + "learning_rate": 5.928870897788058e-06, + "loss": 0.226, + "step": 1367, + "teacher_loss": 0.2066381871700287 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.6261770725250244, + "learning_rate": 5.933208038166836e-06, + "loss": 0.383, + "step": 1368, + "teacher_loss": 0.35600489377975464 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.5161714553833008, + "learning_rate": 5.9375451785456126e-06, + "loss": 0.3483, + "step": 1369, + "teacher_loss": 0.3296029269695282 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.41568416357040405, + "learning_rate": 5.941882318924389e-06, + "loss": 0.245, + "step": 1370, + "teacher_loss": 0.22598525881767273 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.7609728574752808, + "learning_rate": 5.946219459303167e-06, + "loss": 0.3333, + "step": 1371, + "teacher_loss": 0.28575897216796875 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.2975355386734009, + "learning_rate": 5.950556599681943e-06, + "loss": 0.1979, + "step": 1372, + "teacher_loss": 0.18677443265914917 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.45459914207458496, + "learning_rate": 5.9548937400607205e-06, + "loss": 0.2376, + "step": 1373, + "teacher_loss": 0.21353699266910553 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.6039212346076965, + "learning_rate": 5.9592308804394964e-06, + "loss": 0.399, + "step": 1374, + "teacher_loss": 0.37624669075012207 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.42793986201286316, + "learning_rate": 5.963568020818274e-06, + "loss": 0.2062, + "step": 1375, + "teacher_loss": 0.18154001235961914 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.41243982315063477, + "learning_rate": 5.967905161197051e-06, + "loss": 0.2315, + "step": 1376, + "teacher_loss": 0.21144677698612213 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.30396029353141785, + "learning_rate": 5.972242301575828e-06, + "loss": 0.2223, + "step": 1377, + "teacher_loss": 0.21323281526565552 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.5517523288726807, + "learning_rate": 5.976579441954604e-06, + "loss": 0.2223, + "step": 1378, + "teacher_loss": 0.1857045292854309 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.39856451749801636, + "learning_rate": 5.980916582333382e-06, + "loss": 0.26, + "step": 1379, + "teacher_loss": 0.2445628046989441 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.2551502585411072, + "learning_rate": 5.985253722712159e-06, + "loss": 0.2171, + "step": 1380, + "teacher_loss": 0.21285447478294373 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.40075159072875977, + "learning_rate": 5.9895908630909356e-06, + "loss": 0.2789, + "step": 1381, + "teacher_loss": 0.26538708806037903 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.15860755741596222, + "learning_rate": 5.993928003469713e-06, + "loss": 0.1825, + "step": 1382, + "teacher_loss": 0.1851159930229187 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.22349731624126434, + "learning_rate": 5.998265143848489e-06, + "loss": 0.2449, + "step": 1383, + "teacher_loss": 0.2472614198923111 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.9152520895004272, + "learning_rate": 6.002602284227267e-06, + "loss": 0.4403, + "step": 1384, + "teacher_loss": 0.38753193616867065 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.2609570026397705, + "learning_rate": 6.006939424606043e-06, + "loss": 0.244, + "step": 1385, + "teacher_loss": 0.2420898824930191 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.253960520029068, + "learning_rate": 6.01127656498482e-06, + "loss": 0.275, + "step": 1386, + "teacher_loss": 0.27731263637542725 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4052412211894989, + "learning_rate": 6.015613705363597e-06, + "loss": 0.2549, + "step": 1387, + "teacher_loss": 0.23824575543403625 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.5720700621604919, + "learning_rate": 6.019950845742374e-06, + "loss": 0.2996, + "step": 1388, + "teacher_loss": 0.2693380117416382 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4965176582336426, + "learning_rate": 6.024287986121151e-06, + "loss": 0.3023, + "step": 1389, + "teacher_loss": 0.2807462513446808 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.8628653287887573, + "learning_rate": 6.028625126499928e-06, + "loss": 0.2695, + "step": 1390, + "teacher_loss": 0.20353132486343384 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4072635769844055, + "learning_rate": 6.032962266878705e-06, + "loss": 0.3392, + "step": 1391, + "teacher_loss": 0.33163323998451233 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.6146756410598755, + "learning_rate": 6.037299407257482e-06, + "loss": 0.2608, + "step": 1392, + "teacher_loss": 0.22149290144443512 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.21091735363006592, + "learning_rate": 6.0416365476362585e-06, + "loss": 0.2156, + "step": 1393, + "teacher_loss": 0.21609428524971008 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.43321549892425537, + "learning_rate": 6.045973688015035e-06, + "loss": 0.2462, + "step": 1394, + "teacher_loss": 0.22545374929904938 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4395361542701721, + "learning_rate": 6.050310828393813e-06, + "loss": 0.4239, + "step": 1395, + "teacher_loss": 0.42217665910720825 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.3347514271736145, + "learning_rate": 6.054647968772589e-06, + "loss": 0.2103, + "step": 1396, + "teacher_loss": 0.19650761783123016 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.29272395372390747, + "learning_rate": 6.0589851091513665e-06, + "loss": 0.244, + "step": 1397, + "teacher_loss": 0.23856772482395172 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.3474947512149811, + "learning_rate": 6.063322249530143e-06, + "loss": 0.1883, + "step": 1398, + "teacher_loss": 0.1706659495830536 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.39376091957092285, + "learning_rate": 6.06765938990892e-06, + "loss": 0.2387, + "step": 1399, + "teacher_loss": 0.22142837941646576 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.1845490038394928, + "learning_rate": 6.071996530287697e-06, + "loss": 0.2102, + "step": 1400, + "teacher_loss": 0.2130938470363617 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.24443325400352478, + "learning_rate": 6.0763336706664744e-06, + "loss": 0.1888, + "step": 1401, + "teacher_loss": 0.18261878192424774 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4610665440559387, + "learning_rate": 6.080670811045251e-06, + "loss": 0.2678, + "step": 1402, + "teacher_loss": 0.2463333010673523 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.2577584385871887, + "learning_rate": 6.085007951424028e-06, + "loss": 0.1618, + "step": 1403, + "teacher_loss": 0.1511085033416748 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.7570144534111023, + "learning_rate": 6.089345091802805e-06, + "loss": 0.4051, + "step": 1404, + "teacher_loss": 0.36602944135665894 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.5082919001579285, + "learning_rate": 6.0936822321815815e-06, + "loss": 0.3033, + "step": 1405, + "teacher_loss": 0.28048452734947205 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.3867013156414032, + "learning_rate": 6.098019372560359e-06, + "loss": 0.3065, + "step": 1406, + "teacher_loss": 0.29753541946411133 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.42510563135147095, + "learning_rate": 6.102356512939135e-06, + "loss": 0.2138, + "step": 1407, + "teacher_loss": 0.1903550922870636 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.4607405662536621, + "learning_rate": 6.106693653317913e-06, + "loss": 0.2426, + "step": 1408, + "teacher_loss": 0.21835315227508545 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.5637032985687256, + "learning_rate": 6.1110307936966895e-06, + "loss": 0.2444, + "step": 1409, + "teacher_loss": 0.20892012119293213 + }, + { + "compression_loss": 0.0, + "epoch": 0.25, + "label_loss": 0.41851964592933655, + "learning_rate": 6.115367934075466e-06, + "loss": 0.3185, + "step": 1410, + "teacher_loss": 0.3073967695236206 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.4464087188243866, + "learning_rate": 6.119705074454243e-06, + "loss": 0.2377, + "step": 1411, + "teacher_loss": 0.21449559926986694 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.35517561435699463, + "learning_rate": 6.12404221483302e-06, + "loss": 0.2409, + "step": 1412, + "teacher_loss": 0.22819578647613525 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5694050788879395, + "learning_rate": 6.1283793552117974e-06, + "loss": 0.3702, + "step": 1413, + "teacher_loss": 0.34801793098449707 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.9597104787826538, + "learning_rate": 6.132716495590574e-06, + "loss": 0.2499, + "step": 1414, + "teacher_loss": 0.17097695171833038 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.34097692370414734, + "learning_rate": 6.137053635969351e-06, + "loss": 0.2533, + "step": 1415, + "teacher_loss": 0.24356834590435028 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 1.5129200220108032, + "learning_rate": 6.141390776348128e-06, + "loss": 0.3439, + "step": 1416, + "teacher_loss": 0.21406057476997375 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.4174276888370514, + "learning_rate": 6.145727916726905e-06, + "loss": 0.2405, + "step": 1417, + "teacher_loss": 0.2208547592163086 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5416680574417114, + "learning_rate": 6.150065057105681e-06, + "loss": 0.2827, + "step": 1418, + "teacher_loss": 0.25397396087646484 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.6673641204833984, + "learning_rate": 6.154402197484459e-06, + "loss": 0.3037, + "step": 1419, + "teacher_loss": 0.2632533311843872 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.08588457852602005, + "learning_rate": 6.158739337863236e-06, + "loss": 0.1625, + "step": 1420, + "teacher_loss": 0.17100778222084045 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.8911499977111816, + "learning_rate": 6.1630764782420125e-06, + "loss": 0.3628, + "step": 1421, + "teacher_loss": 0.30414801836013794 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.6807798147201538, + "learning_rate": 6.167413618620789e-06, + "loss": 0.2802, + "step": 1422, + "teacher_loss": 0.23572000861167908 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5705142021179199, + "learning_rate": 6.171750758999566e-06, + "loss": 0.3701, + "step": 1423, + "teacher_loss": 0.34780365228652954 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.42614126205444336, + "learning_rate": 6.176087899378344e-06, + "loss": 0.2305, + "step": 1424, + "teacher_loss": 0.20878547430038452 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.2949012815952301, + "learning_rate": 6.1804250397571204e-06, + "loss": 0.2294, + "step": 1425, + "teacher_loss": 0.2221618890762329 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.16361354291439056, + "learning_rate": 6.184762180135897e-06, + "loss": 0.1988, + "step": 1426, + "teacher_loss": 0.20274531841278076 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5443438291549683, + "learning_rate": 6.189099320514674e-06, + "loss": 0.2351, + "step": 1427, + "teacher_loss": 0.2007354199886322 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.44500407576560974, + "learning_rate": 6.193436460893452e-06, + "loss": 0.336, + "step": 1428, + "teacher_loss": 0.32389694452285767 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.9601039886474609, + "learning_rate": 6.1977736012722275e-06, + "loss": 0.5154, + "step": 1429, + "teacher_loss": 0.465933620929718 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.2091706395149231, + "learning_rate": 6.202110741651005e-06, + "loss": 0.2222, + "step": 1430, + "teacher_loss": 0.22361059486865997 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.3767629861831665, + "learning_rate": 6.206447882029782e-06, + "loss": 0.247, + "step": 1431, + "teacher_loss": 0.2325373888015747 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.41331759095191956, + "learning_rate": 6.210785022408559e-06, + "loss": 0.2803, + "step": 1432, + "teacher_loss": 0.2655293941497803 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5075310468673706, + "learning_rate": 6.2151221627873355e-06, + "loss": 0.292, + "step": 1433, + "teacher_loss": 0.26809728145599365 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 1.3871757984161377, + "learning_rate": 6.219459303166112e-06, + "loss": 0.421, + "step": 1434, + "teacher_loss": 0.3136047124862671 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.23288634419441223, + "learning_rate": 6.22379644354489e-06, + "loss": 0.2456, + "step": 1435, + "teacher_loss": 0.24706783890724182 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.24160626530647278, + "learning_rate": 6.228133583923667e-06, + "loss": 0.23, + "step": 1436, + "teacher_loss": 0.2286582887172699 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.47641247510910034, + "learning_rate": 6.2324707243024434e-06, + "loss": 0.2617, + "step": 1437, + "teacher_loss": 0.2377924919128418 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.2589383125305176, + "learning_rate": 6.23680786468122e-06, + "loss": 0.226, + "step": 1438, + "teacher_loss": 0.22230902314186096 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.4442060589790344, + "learning_rate": 6.241145005059998e-06, + "loss": 0.2889, + "step": 1439, + "teacher_loss": 0.2716790437698364 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5592027902603149, + "learning_rate": 6.245482145438774e-06, + "loss": 0.3425, + "step": 1440, + "teacher_loss": 0.3184077739715576 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.44950759410858154, + "learning_rate": 6.249819285817551e-06, + "loss": 0.2074, + "step": 1441, + "teacher_loss": 0.18050439655780792 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.6417919397354126, + "learning_rate": 6.254156426196327e-06, + "loss": 0.2697, + "step": 1442, + "teacher_loss": 0.22840999066829681 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.3183668851852417, + "learning_rate": 6.258493566575105e-06, + "loss": 0.2171, + "step": 1443, + "teacher_loss": 0.2058173418045044 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.251525342464447, + "learning_rate": 6.262830706953882e-06, + "loss": 0.2528, + "step": 1444, + "teacher_loss": 0.25295931100845337 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.6930206418037415, + "learning_rate": 6.2671678473326585e-06, + "loss": 0.3206, + "step": 1445, + "teacher_loss": 0.27924779057502747 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.839125394821167, + "learning_rate": 6.271504987711436e-06, + "loss": 0.3106, + "step": 1446, + "teacher_loss": 0.25189632177352905 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.583656370639801, + "learning_rate": 6.275842128090213e-06, + "loss": 0.2614, + "step": 1447, + "teacher_loss": 0.22556567192077637 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.15846890211105347, + "learning_rate": 6.28017926846899e-06, + "loss": 0.2701, + "step": 1448, + "teacher_loss": 0.2825484871864319 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.46844011545181274, + "learning_rate": 6.284516408847766e-06, + "loss": 0.1997, + "step": 1449, + "teacher_loss": 0.16980135440826416 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.2734919786453247, + "learning_rate": 6.288853549226544e-06, + "loss": 0.1866, + "step": 1450, + "teacher_loss": 0.17698529362678528 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 1.0001075267791748, + "learning_rate": 6.29319068960532e-06, + "loss": 0.439, + "step": 1451, + "teacher_loss": 0.37663692235946655 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5742713212966919, + "learning_rate": 6.297527829984098e-06, + "loss": 0.3119, + "step": 1452, + "teacher_loss": 0.28274089097976685 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.13663902878761292, + "learning_rate": 6.3018649703628735e-06, + "loss": 0.1616, + "step": 1453, + "teacher_loss": 0.16433680057525635 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.42710593342781067, + "learning_rate": 6.306202110741651e-06, + "loss": 0.1928, + "step": 1454, + "teacher_loss": 0.16672807931900024 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.21836897730827332, + "learning_rate": 6.310539251120428e-06, + "loss": 0.2175, + "step": 1455, + "teacher_loss": 0.21740569174289703 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.3814011514186859, + "learning_rate": 6.314876391499205e-06, + "loss": 0.2061, + "step": 1456, + "teacher_loss": 0.18667420744895935 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.3604958951473236, + "learning_rate": 6.319213531877982e-06, + "loss": 0.2128, + "step": 1457, + "teacher_loss": 0.19633695483207703 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.3024424910545349, + "learning_rate": 6.323550672256759e-06, + "loss": 0.2196, + "step": 1458, + "teacher_loss": 0.21035520732402802 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.8184963464736938, + "learning_rate": 6.327887812635536e-06, + "loss": 0.2672, + "step": 1459, + "teacher_loss": 0.20598170161247253 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.5430244207382202, + "learning_rate": 6.332224953014313e-06, + "loss": 0.263, + "step": 1460, + "teacher_loss": 0.23191845417022705 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.25123992562294006, + "learning_rate": 6.33656209339309e-06, + "loss": 0.2486, + "step": 1461, + "teacher_loss": 0.24826672673225403 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.46620747447013855, + "learning_rate": 6.340899233771866e-06, + "loss": 0.294, + "step": 1462, + "teacher_loss": 0.2748807668685913 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.2556838095188141, + "learning_rate": 6.345236374150644e-06, + "loss": 0.3202, + "step": 1463, + "teacher_loss": 0.3273867964744568 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.4673628807067871, + "learning_rate": 6.34957351452942e-06, + "loss": 0.3111, + "step": 1464, + "teacher_loss": 0.293710857629776 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.28518030047416687, + "learning_rate": 6.353910654908197e-06, + "loss": 0.1844, + "step": 1465, + "teacher_loss": 0.17314721643924713 + }, + { + "compression_loss": 0.0, + "epoch": 0.26, + "label_loss": 0.2275487780570984, + "learning_rate": 6.358247795286975e-06, + "loss": 0.2176, + "step": 1466, + "teacher_loss": 0.21654535830020905 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.7746908068656921, + "learning_rate": 6.362584935665751e-06, + "loss": 0.308, + "step": 1467, + "teacher_loss": 0.256172776222229 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.33609867095947266, + "learning_rate": 6.3669220760445285e-06, + "loss": 0.2475, + "step": 1468, + "teacher_loss": 0.23766066133975983 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.42383456230163574, + "learning_rate": 6.371259216423305e-06, + "loss": 0.2354, + "step": 1469, + "teacher_loss": 0.2144349068403244 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.23633140325546265, + "learning_rate": 6.375596356802082e-06, + "loss": 0.2335, + "step": 1470, + "teacher_loss": 0.23315949738025665 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5437577962875366, + "learning_rate": 6.379933497180859e-06, + "loss": 0.2238, + "step": 1471, + "teacher_loss": 0.18822576105594635 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3644137978553772, + "learning_rate": 6.384270637559636e-06, + "loss": 0.1943, + "step": 1472, + "teacher_loss": 0.17536523938179016 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.7902100682258606, + "learning_rate": 6.388607777938412e-06, + "loss": 0.2723, + "step": 1473, + "teacher_loss": 0.2147947996854782 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.9066468477249146, + "learning_rate": 6.39294491831719e-06, + "loss": 0.3154, + "step": 1474, + "teacher_loss": 0.24971899390220642 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5707660913467407, + "learning_rate": 6.397282058695966e-06, + "loss": 0.3259, + "step": 1475, + "teacher_loss": 0.2986467480659485 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.39851319789886475, + "learning_rate": 6.401619199074744e-06, + "loss": 0.3067, + "step": 1476, + "teacher_loss": 0.2964468002319336 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.45031142234802246, + "learning_rate": 6.405956339453521e-06, + "loss": 0.2675, + "step": 1477, + "teacher_loss": 0.2471812218427658 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.45350366830825806, + "learning_rate": 6.410293479832297e-06, + "loss": 0.2928, + "step": 1478, + "teacher_loss": 0.27496200799942017 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.452339768409729, + "learning_rate": 6.414630620211075e-06, + "loss": 0.2043, + "step": 1479, + "teacher_loss": 0.17673403024673462 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.37310177087783813, + "learning_rate": 6.4189677605898515e-06, + "loss": 0.24, + "step": 1480, + "teacher_loss": 0.2251579463481903 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.6626706123352051, + "learning_rate": 6.423304900968628e-06, + "loss": 0.3455, + "step": 1481, + "teacher_loss": 0.31028690934181213 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3392796814441681, + "learning_rate": 6.427642041347405e-06, + "loss": 0.2424, + "step": 1482, + "teacher_loss": 0.23168599605560303 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3476063311100006, + "learning_rate": 6.431979181726182e-06, + "loss": 0.2076, + "step": 1483, + "teacher_loss": 0.1920507401227951 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3485298454761505, + "learning_rate": 6.436316322104959e-06, + "loss": 0.2023, + "step": 1484, + "teacher_loss": 0.18600186705589294 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3874940872192383, + "learning_rate": 6.440653462483736e-06, + "loss": 0.2809, + "step": 1485, + "teacher_loss": 0.2690887153148651 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.1845911592245102, + "learning_rate": 6.444990602862512e-06, + "loss": 0.1195, + "step": 1486, + "teacher_loss": 0.11229465901851654 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.4796159863471985, + "learning_rate": 6.44932774324129e-06, + "loss": 0.2279, + "step": 1487, + "teacher_loss": 0.19996792078018188 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5086467266082764, + "learning_rate": 6.453664883620067e-06, + "loss": 0.32, + "step": 1488, + "teacher_loss": 0.29902976751327515 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.4942467212677002, + "learning_rate": 6.458002023998843e-06, + "loss": 0.2338, + "step": 1489, + "teacher_loss": 0.2048470824956894 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.34948718547821045, + "learning_rate": 6.462339164377621e-06, + "loss": 0.2153, + "step": 1490, + "teacher_loss": 0.20037628710269928 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5074208974838257, + "learning_rate": 6.466676304756397e-06, + "loss": 0.2884, + "step": 1491, + "teacher_loss": 0.26407018303871155 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.9427697658538818, + "learning_rate": 6.4710134451351745e-06, + "loss": 0.2791, + "step": 1492, + "teacher_loss": 0.20534226298332214 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.438088983297348, + "learning_rate": 6.475350585513951e-06, + "loss": 0.2172, + "step": 1493, + "teacher_loss": 0.19268149137496948 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.4645646810531616, + "learning_rate": 6.479687725892728e-06, + "loss": 0.2872, + "step": 1494, + "teacher_loss": 0.2675420939922333 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 1.3653159141540527, + "learning_rate": 6.484024866271505e-06, + "loss": 0.4513, + "step": 1495, + "teacher_loss": 0.3497464060783386 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.7653203010559082, + "learning_rate": 6.4883620066502825e-06, + "loss": 0.2472, + "step": 1496, + "teacher_loss": 0.18967586755752563 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.21197505295276642, + "learning_rate": 6.492699147029058e-06, + "loss": 0.2276, + "step": 1497, + "teacher_loss": 0.2293054312467575 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.26606351137161255, + "learning_rate": 6.497036287407836e-06, + "loss": 0.1999, + "step": 1498, + "teacher_loss": 0.19250395894050598 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5619463920593262, + "learning_rate": 6.501373427786614e-06, + "loss": 0.2635, + "step": 1499, + "teacher_loss": 0.2303701937198639 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.566064178943634, + "learning_rate": 6.5057105681653896e-06, + "loss": 0.4351, + "step": 1500, + "teacher_loss": 0.4205273687839508 + }, + { + "epoch": 0.27, + "eval_exact_match": 79.5837275307474, + "eval_f1": 87.06577693357859, + "step": 1500 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.6536146402359009, + "learning_rate": 6.510047708544167e-06, + "loss": 0.3145, + "step": 1501, + "teacher_loss": 0.27684885263442993 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.6563832759857178, + "learning_rate": 6.514384848922943e-06, + "loss": 0.3079, + "step": 1502, + "teacher_loss": 0.2692255973815918 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5887653827667236, + "learning_rate": 6.518721989301721e-06, + "loss": 0.3174, + "step": 1503, + "teacher_loss": 0.28729528188705444 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.31192320585250854, + "learning_rate": 6.5230591296804975e-06, + "loss": 0.196, + "step": 1504, + "teacher_loss": 0.18310143053531647 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.4568164050579071, + "learning_rate": 6.527396270059274e-06, + "loss": 0.2577, + "step": 1505, + "teacher_loss": 0.2356141209602356 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.853651225566864, + "learning_rate": 6.531733410438051e-06, + "loss": 0.3065, + "step": 1506, + "teacher_loss": 0.24565467238426208 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.19764959812164307, + "learning_rate": 6.536070550816829e-06, + "loss": 0.1998, + "step": 1507, + "teacher_loss": 0.20005784928798676 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3866952657699585, + "learning_rate": 6.540407691195605e-06, + "loss": 0.239, + "step": 1508, + "teacher_loss": 0.22258563339710236 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.3077249228954315, + "learning_rate": 6.544744831574382e-06, + "loss": 0.3707, + "step": 1509, + "teacher_loss": 0.3777162432670593 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.7031110525131226, + "learning_rate": 6.54908197195316e-06, + "loss": 0.2954, + "step": 1510, + "teacher_loss": 0.25006282329559326 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.469660222530365, + "learning_rate": 6.553419112331936e-06, + "loss": 0.2713, + "step": 1511, + "teacher_loss": 0.24926158785820007 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.44997936487197876, + "learning_rate": 6.557756252710713e-06, + "loss": 0.2722, + "step": 1512, + "teacher_loss": 0.2523934245109558 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.1950952410697937, + "learning_rate": 6.562093393089489e-06, + "loss": 0.204, + "step": 1513, + "teacher_loss": 0.20494115352630615 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.4545411467552185, + "learning_rate": 6.566430533468267e-06, + "loss": 0.2405, + "step": 1514, + "teacher_loss": 0.2167295664548874 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.2769771218299866, + "learning_rate": 6.570767673847044e-06, + "loss": 0.2203, + "step": 1515, + "teacher_loss": 0.21395191550254822 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.6572542190551758, + "learning_rate": 6.5751048142258205e-06, + "loss": 0.3388, + "step": 1516, + "teacher_loss": 0.30342453718185425 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.20394620299339294, + "learning_rate": 6.579441954604597e-06, + "loss": 0.2197, + "step": 1517, + "teacher_loss": 0.22148063778877258 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5273770689964294, + "learning_rate": 6.583779094983375e-06, + "loss": 0.2951, + "step": 1518, + "teacher_loss": 0.26924821734428406 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.18105781078338623, + "learning_rate": 6.588116235362151e-06, + "loss": 0.1567, + "step": 1519, + "teacher_loss": 0.15399745106697083 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.37027502059936523, + "learning_rate": 6.5924533757409285e-06, + "loss": 0.2491, + "step": 1520, + "teacher_loss": 0.23565515875816345 + }, + { + "compression_loss": 0.0, + "epoch": 0.27, + "label_loss": 0.5778634548187256, + "learning_rate": 6.596790516119705e-06, + "loss": 0.2677, + "step": 1521, + "teacher_loss": 0.23321810364723206 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.48173433542251587, + "learning_rate": 6.601127656498482e-06, + "loss": 0.3414, + "step": 1522, + "teacher_loss": 0.3257637619972229 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.28270217776298523, + "learning_rate": 6.60546479687726e-06, + "loss": 0.2089, + "step": 1523, + "teacher_loss": 0.20072519779205322 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.7699395418167114, + "learning_rate": 6.6098019372560356e-06, + "loss": 0.6166, + "step": 1524, + "teacher_loss": 0.5995362997055054 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.354941189289093, + "learning_rate": 6.614139077634813e-06, + "loss": 0.2355, + "step": 1525, + "teacher_loss": 0.222259059548378 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.36794978380203247, + "learning_rate": 6.61847621801359e-06, + "loss": 0.2587, + "step": 1526, + "teacher_loss": 0.24660485982894897 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.2065725028514862, + "learning_rate": 6.622813358392367e-06, + "loss": 0.2204, + "step": 1527, + "teacher_loss": 0.22196698188781738 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4842984080314636, + "learning_rate": 6.6271504987711435e-06, + "loss": 0.2904, + "step": 1528, + "teacher_loss": 0.2688485383987427 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4599143862724304, + "learning_rate": 6.631487639149921e-06, + "loss": 0.2323, + "step": 1529, + "teacher_loss": 0.20698490738868713 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3056512475013733, + "learning_rate": 6.635824779528697e-06, + "loss": 0.1645, + "step": 1530, + "teacher_loss": 0.14879915118217468 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.14455418288707733, + "learning_rate": 6.640161919907475e-06, + "loss": 0.2401, + "step": 1531, + "teacher_loss": 0.2507137656211853 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4103032350540161, + "learning_rate": 6.6444990602862515e-06, + "loss": 0.2954, + "step": 1532, + "teacher_loss": 0.28265517950057983 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.14162828028202057, + "learning_rate": 6.648836200665028e-06, + "loss": 0.1634, + "step": 1533, + "teacher_loss": 0.16582491993904114 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.44356968998908997, + "learning_rate": 6.653173341043806e-06, + "loss": 0.3179, + "step": 1534, + "teacher_loss": 0.30391383171081543 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 1.1816470623016357, + "learning_rate": 6.657510481422582e-06, + "loss": 0.3503, + "step": 1535, + "teacher_loss": 0.25797995924949646 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4879845380783081, + "learning_rate": 6.661847621801359e-06, + "loss": 0.2168, + "step": 1536, + "teacher_loss": 0.18672238290309906 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4371688961982727, + "learning_rate": 6.666184762180136e-06, + "loss": 0.2967, + "step": 1537, + "teacher_loss": 0.2811279296875 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.5191504955291748, + "learning_rate": 6.670521902558913e-06, + "loss": 0.3176, + "step": 1538, + "teacher_loss": 0.29522716999053955 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4230346977710724, + "learning_rate": 6.67485904293769e-06, + "loss": 0.2532, + "step": 1539, + "teacher_loss": 0.2343805879354477 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3335103690624237, + "learning_rate": 6.6791961833164665e-06, + "loss": 0.1938, + "step": 1540, + "teacher_loss": 0.1782270222902298 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.5602671504020691, + "learning_rate": 6.683533323695243e-06, + "loss": 0.2814, + "step": 1541, + "teacher_loss": 0.25040560960769653 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.2900892198085785, + "learning_rate": 6.687870464074021e-06, + "loss": 0.2469, + "step": 1542, + "teacher_loss": 0.24214991927146912 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3640191853046417, + "learning_rate": 6.692207604452798e-06, + "loss": 0.3305, + "step": 1543, + "teacher_loss": 0.3268120288848877 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.28090301156044006, + "learning_rate": 6.6965447448315744e-06, + "loss": 0.2424, + "step": 1544, + "teacher_loss": 0.2381608486175537 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3154433071613312, + "learning_rate": 6.700881885210352e-06, + "loss": 0.3793, + "step": 1545, + "teacher_loss": 0.3864471912384033 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.6108459234237671, + "learning_rate": 6.705219025589128e-06, + "loss": 0.3181, + "step": 1546, + "teacher_loss": 0.2855387330055237 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4527820944786072, + "learning_rate": 6.709556165967906e-06, + "loss": 0.24, + "step": 1547, + "teacher_loss": 0.21638791263103485 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3557090163230896, + "learning_rate": 6.713893306346682e-06, + "loss": 0.215, + "step": 1548, + "teacher_loss": 0.19931426644325256 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.18618391454219818, + "learning_rate": 6.718230446725459e-06, + "loss": 0.1967, + "step": 1549, + "teacher_loss": 0.1978602111339569 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.2845844030380249, + "learning_rate": 6.722567587104236e-06, + "loss": 0.3817, + "step": 1550, + "teacher_loss": 0.3925043046474457 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4687957167625427, + "learning_rate": 6.726904727483013e-06, + "loss": 0.2492, + "step": 1551, + "teacher_loss": 0.22483769059181213 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4031181037425995, + "learning_rate": 6.7312418678617895e-06, + "loss": 0.2792, + "step": 1552, + "teacher_loss": 0.2654242217540741 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.635313868522644, + "learning_rate": 6.735579008240567e-06, + "loss": 0.2866, + "step": 1553, + "teacher_loss": 0.24783284962177277 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.5238542556762695, + "learning_rate": 6.739916148619344e-06, + "loss": 0.252, + "step": 1554, + "teacher_loss": 0.2218276858329773 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4486095905303955, + "learning_rate": 6.744253288998121e-06, + "loss": 0.2399, + "step": 1555, + "teacher_loss": 0.21673499047756195 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.48756474256515503, + "learning_rate": 6.748590429376898e-06, + "loss": 0.375, + "step": 1556, + "teacher_loss": 0.36249426007270813 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.10583238303661346, + "learning_rate": 6.752927569755674e-06, + "loss": 0.1572, + "step": 1557, + "teacher_loss": 0.16290950775146484 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.5338687300682068, + "learning_rate": 6.757264710134452e-06, + "loss": 0.2741, + "step": 1558, + "teacher_loss": 0.24521172046661377 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.7148044109344482, + "learning_rate": 6.761601850513229e-06, + "loss": 0.3346, + "step": 1559, + "teacher_loss": 0.2923468351364136 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3636273741722107, + "learning_rate": 6.765938990892005e-06, + "loss": 0.2747, + "step": 1560, + "teacher_loss": 0.2648392915725708 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4297388792037964, + "learning_rate": 6.770276131270782e-06, + "loss": 0.2728, + "step": 1561, + "teacher_loss": 0.2554115653038025 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 1.492283582687378, + "learning_rate": 6.774613271649559e-06, + "loss": 0.6542, + "step": 1562, + "teacher_loss": 0.5610284209251404 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.47709864377975464, + "learning_rate": 6.778950412028336e-06, + "loss": 0.251, + "step": 1563, + "teacher_loss": 0.2258908897638321 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.24776878952980042, + "learning_rate": 6.783287552407113e-06, + "loss": 0.193, + "step": 1564, + "teacher_loss": 0.18690750002861023 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.7163398265838623, + "learning_rate": 6.78762469278589e-06, + "loss": 0.3204, + "step": 1565, + "teacher_loss": 0.27636831998825073 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.2928646206855774, + "learning_rate": 6.791961833164667e-06, + "loss": 0.1992, + "step": 1566, + "teacher_loss": 0.18874001502990723 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.3436906933784485, + "learning_rate": 6.7962989735434445e-06, + "loss": 0.2255, + "step": 1567, + "teacher_loss": 0.21235454082489014 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.2857658863067627, + "learning_rate": 6.8006361139222204e-06, + "loss": 0.2658, + "step": 1568, + "teacher_loss": 0.2636314928531647 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.30223679542541504, + "learning_rate": 6.804973254300998e-06, + "loss": 0.2035, + "step": 1569, + "teacher_loss": 0.19252745807170868 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.2982024550437927, + "learning_rate": 6.809310394679774e-06, + "loss": 0.2603, + "step": 1570, + "teacher_loss": 0.25607356429100037 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.5866947770118713, + "learning_rate": 6.813647535058552e-06, + "loss": 0.2633, + "step": 1571, + "teacher_loss": 0.2273411899805069 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4968215823173523, + "learning_rate": 6.817984675437328e-06, + "loss": 0.2707, + "step": 1572, + "teacher_loss": 0.24559777975082397 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.6647064089775085, + "learning_rate": 6.822321815816105e-06, + "loss": 0.4016, + "step": 1573, + "teacher_loss": 0.3723995089530945 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.686487078666687, + "learning_rate": 6.826658956194882e-06, + "loss": 0.284, + "step": 1574, + "teacher_loss": 0.23932772874832153 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.8935974836349487, + "learning_rate": 6.8309960965736596e-06, + "loss": 0.2883, + "step": 1575, + "teacher_loss": 0.2210836112499237 + }, + { + "compression_loss": 0.0, + "epoch": 0.28, + "label_loss": 0.4665301740169525, + "learning_rate": 6.835333236952436e-06, + "loss": 0.3239, + "step": 1576, + "teacher_loss": 0.3080606162548065 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.19298695027828217, + "learning_rate": 6.839670377331213e-06, + "loss": 0.1892, + "step": 1577, + "teacher_loss": 0.18879903852939606 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.8310211300849915, + "learning_rate": 6.844007517709991e-06, + "loss": 0.2559, + "step": 1578, + "teacher_loss": 0.19204741716384888 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6331437230110168, + "learning_rate": 6.848344658088767e-06, + "loss": 0.2812, + "step": 1579, + "teacher_loss": 0.24213330447673798 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6423879265785217, + "learning_rate": 6.852681798467544e-06, + "loss": 0.303, + "step": 1580, + "teacher_loss": 0.2652893364429474 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.4978925585746765, + "learning_rate": 6.85701893884632e-06, + "loss": 0.3412, + "step": 1581, + "teacher_loss": 0.32374048233032227 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.697215735912323, + "learning_rate": 6.861356079225098e-06, + "loss": 0.3246, + "step": 1582, + "teacher_loss": 0.2831571698188782 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.543228030204773, + "learning_rate": 6.865693219603875e-06, + "loss": 0.2785, + "step": 1583, + "teacher_loss": 0.24907484650611877 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.3739100694656372, + "learning_rate": 6.870030359982651e-06, + "loss": 0.3501, + "step": 1584, + "teacher_loss": 0.34750163555145264 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.5138339996337891, + "learning_rate": 6.874367500361428e-06, + "loss": 0.2365, + "step": 1585, + "teacher_loss": 0.20568975806236267 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.26184117794036865, + "learning_rate": 6.878704640740206e-06, + "loss": 0.2669, + "step": 1586, + "teacher_loss": 0.2674316465854645 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6729931235313416, + "learning_rate": 6.8830417811189826e-06, + "loss": 0.2537, + "step": 1587, + "teacher_loss": 0.20705857872962952 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.4510330259799957, + "learning_rate": 6.887378921497759e-06, + "loss": 0.2922, + "step": 1588, + "teacher_loss": 0.2745053768157959 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6315981149673462, + "learning_rate": 6.891716061876536e-06, + "loss": 0.3575, + "step": 1589, + "teacher_loss": 0.32701215147972107 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.2609737813472748, + "learning_rate": 6.896053202255313e-06, + "loss": 0.2111, + "step": 1590, + "teacher_loss": 0.2055622637271881 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.7260409593582153, + "learning_rate": 6.9003903426340905e-06, + "loss": 0.2536, + "step": 1591, + "teacher_loss": 0.20115147531032562 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6837571859359741, + "learning_rate": 6.9047274830128664e-06, + "loss": 0.3027, + "step": 1592, + "teacher_loss": 0.26033419370651245 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.3227293789386749, + "learning_rate": 6.909064623391644e-06, + "loss": 0.1969, + "step": 1593, + "teacher_loss": 0.1829744279384613 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6455123424530029, + "learning_rate": 6.913401763770421e-06, + "loss": 0.283, + "step": 1594, + "teacher_loss": 0.24272695183753967 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6461910009384155, + "learning_rate": 6.917738904149198e-06, + "loss": 0.2789, + "step": 1595, + "teacher_loss": 0.23814070224761963 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.2895563244819641, + "learning_rate": 6.922076044527974e-06, + "loss": 0.2847, + "step": 1596, + "teacher_loss": 0.28418421745300293 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.22185850143432617, + "learning_rate": 6.926413184906752e-06, + "loss": 0.2692, + "step": 1597, + "teacher_loss": 0.27450883388519287 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.4758252501487732, + "learning_rate": 6.930750325285529e-06, + "loss": 0.226, + "step": 1598, + "teacher_loss": 0.19828465580940247 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.1545831710100174, + "learning_rate": 6.9350874656643055e-06, + "loss": 0.2554, + "step": 1599, + "teacher_loss": 0.26658469438552856 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.27799665927886963, + "learning_rate": 6.939424606043082e-06, + "loss": 0.2782, + "step": 1600, + "teacher_loss": 0.27825433015823364 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.3682354688644409, + "learning_rate": 6.943761746421859e-06, + "loss": 0.2725, + "step": 1601, + "teacher_loss": 0.26190778613090515 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.36152321100234985, + "learning_rate": 6.948098886800637e-06, + "loss": 0.2341, + "step": 1602, + "teacher_loss": 0.21999385952949524 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.23636780679225922, + "learning_rate": 6.952436027179413e-06, + "loss": 0.193, + "step": 1603, + "teacher_loss": 0.18817946314811707 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.24163982272148132, + "learning_rate": 6.95677316755819e-06, + "loss": 0.226, + "step": 1604, + "teacher_loss": 0.22429007291793823 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.18795496225357056, + "learning_rate": 6.961110307936967e-06, + "loss": 0.1735, + "step": 1605, + "teacher_loss": 0.17191702127456665 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.3355136513710022, + "learning_rate": 6.965447448315744e-06, + "loss": 0.2183, + "step": 1606, + "teacher_loss": 0.20526964962482452 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.9709417819976807, + "learning_rate": 6.969784588694521e-06, + "loss": 0.3519, + "step": 1607, + "teacher_loss": 0.28310951590538025 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6504513621330261, + "learning_rate": 6.974121729073298e-06, + "loss": 0.3362, + "step": 1608, + "teacher_loss": 0.3012961745262146 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.30171966552734375, + "learning_rate": 6.978458869452075e-06, + "loss": 0.232, + "step": 1609, + "teacher_loss": 0.2242894470691681 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.41263431310653687, + "learning_rate": 6.982796009830852e-06, + "loss": 0.1743, + "step": 1610, + "teacher_loss": 0.1478133201599121 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.7839899063110352, + "learning_rate": 6.9871331502096285e-06, + "loss": 0.2454, + "step": 1611, + "teacher_loss": 0.18555572628974915 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.5709105730056763, + "learning_rate": 6.991470290588405e-06, + "loss": 0.2441, + "step": 1612, + "teacher_loss": 0.20782436430454254 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.5652528405189514, + "learning_rate": 6.995807430967183e-06, + "loss": 0.3085, + "step": 1613, + "teacher_loss": 0.27997809648513794 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.4097408652305603, + "learning_rate": 7.000144571345959e-06, + "loss": 0.2704, + "step": 1614, + "teacher_loss": 0.2549644708633423 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.20464831590652466, + "learning_rate": 7.0044817117247365e-06, + "loss": 0.1807, + "step": 1615, + "teacher_loss": 0.1780604124069214 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.42826512455940247, + "learning_rate": 7.008818852103513e-06, + "loss": 0.2685, + "step": 1616, + "teacher_loss": 0.2507718801498413 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.42512720823287964, + "learning_rate": 7.01315599248229e-06, + "loss": 0.194, + "step": 1617, + "teacher_loss": 0.16826513409614563 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6167625784873962, + "learning_rate": 7.017493132861068e-06, + "loss": 0.2766, + "step": 1618, + "teacher_loss": 0.23885196447372437 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.8156715631484985, + "learning_rate": 7.021830273239844e-06, + "loss": 0.2435, + "step": 1619, + "teacher_loss": 0.17993509769439697 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.7376936078071594, + "learning_rate": 7.026167413618621e-06, + "loss": 0.2779, + "step": 1620, + "teacher_loss": 0.22675897181034088 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.9738141298294067, + "learning_rate": 7.030504553997398e-06, + "loss": 0.7924, + "step": 1621, + "teacher_loss": 0.7722911238670349 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.9232257604598999, + "learning_rate": 7.034841694376175e-06, + "loss": 0.2871, + "step": 1622, + "teacher_loss": 0.21642255783081055 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.18170255422592163, + "learning_rate": 7.0391788347549515e-06, + "loss": 0.1837, + "step": 1623, + "teacher_loss": 0.18388764560222626 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6453879475593567, + "learning_rate": 7.043515975133729e-06, + "loss": 0.4657, + "step": 1624, + "teacher_loss": 0.4457581639289856 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6297687292098999, + "learning_rate": 7.047853115512505e-06, + "loss": 0.2294, + "step": 1625, + "teacher_loss": 0.18488456308841705 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.44334226846694946, + "learning_rate": 7.052190255891283e-06, + "loss": 0.169, + "step": 1626, + "teacher_loss": 0.13852104544639587 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.654925525188446, + "learning_rate": 7.0565273962700595e-06, + "loss": 0.2909, + "step": 1627, + "teacher_loss": 0.2504042088985443 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.42963486909866333, + "learning_rate": 7.060864536648836e-06, + "loss": 0.2075, + "step": 1628, + "teacher_loss": 0.18282747268676758 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.5620638728141785, + "learning_rate": 7.065201677027614e-06, + "loss": 0.3048, + "step": 1629, + "teacher_loss": 0.2761821746826172 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.6134941577911377, + "learning_rate": 7.06953881740639e-06, + "loss": 0.5665, + "step": 1630, + "teacher_loss": 0.5612772107124329 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.5002784729003906, + "learning_rate": 7.0738759577851674e-06, + "loss": 0.262, + "step": 1631, + "teacher_loss": 0.23550619184970856 + }, + { + "compression_loss": 0.0, + "epoch": 0.29, + "label_loss": 0.45943838357925415, + "learning_rate": 7.078213098163944e-06, + "loss": 0.242, + "step": 1632, + "teacher_loss": 0.21784795820713043 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.8223768472671509, + "learning_rate": 7.082550238542721e-06, + "loss": 0.3204, + "step": 1633, + "teacher_loss": 0.26466691493988037 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.304261177778244, + "learning_rate": 7.086887378921498e-06, + "loss": 0.2178, + "step": 1634, + "teacher_loss": 0.208164781332016 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.44871509075164795, + "learning_rate": 7.091224519300275e-06, + "loss": 0.5954, + "step": 1635, + "teacher_loss": 0.6117015480995178 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5155469179153442, + "learning_rate": 7.095561659679051e-06, + "loss": 0.2616, + "step": 1636, + "teacher_loss": 0.23342445492744446 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5483478307723999, + "learning_rate": 7.099898800057829e-06, + "loss": 0.2518, + "step": 1637, + "teacher_loss": 0.21886010468006134 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.29973161220550537, + "learning_rate": 7.104235940436606e-06, + "loss": 0.2221, + "step": 1638, + "teacher_loss": 0.21342161297798157 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.3664548993110657, + "learning_rate": 7.1085730808153825e-06, + "loss": 0.2595, + "step": 1639, + "teacher_loss": 0.24763160943984985 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.4747645854949951, + "learning_rate": 7.11291022119416e-06, + "loss": 0.2376, + "step": 1640, + "teacher_loss": 0.21121986210346222 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.8306114673614502, + "learning_rate": 7.117247361572936e-06, + "loss": 0.2725, + "step": 1641, + "teacher_loss": 0.21049359440803528 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.46779969334602356, + "learning_rate": 7.121584501951714e-06, + "loss": 0.2104, + "step": 1642, + "teacher_loss": 0.18180415034294128 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.23728707432746887, + "learning_rate": 7.1259216423304904e-06, + "loss": 0.1373, + "step": 1643, + "teacher_loss": 0.12619513273239136 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.8424392342567444, + "learning_rate": 7.130258782709267e-06, + "loss": 0.312, + "step": 1644, + "teacher_loss": 0.25305604934692383 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.34003889560699463, + "learning_rate": 7.134595923088044e-06, + "loss": 0.2074, + "step": 1645, + "teacher_loss": 0.19260868430137634 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.686252772808075, + "learning_rate": 7.138933063466822e-06, + "loss": 0.3789, + "step": 1646, + "teacher_loss": 0.3447898328304291 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.45493486523628235, + "learning_rate": 7.1432702038455975e-06, + "loss": 0.1873, + "step": 1647, + "teacher_loss": 0.15761220455169678 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.6702474355697632, + "learning_rate": 7.147607344224375e-06, + "loss": 0.3254, + "step": 1648, + "teacher_loss": 0.2871015667915344 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.47037121653556824, + "learning_rate": 7.151944484603151e-06, + "loss": 0.1999, + "step": 1649, + "teacher_loss": 0.16986367106437683 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.24249157309532166, + "learning_rate": 7.156281624981929e-06, + "loss": 0.2699, + "step": 1650, + "teacher_loss": 0.272937536239624 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 1.0796585083007812, + "learning_rate": 7.160618765360706e-06, + "loss": 0.3372, + "step": 1651, + "teacher_loss": 0.25466597080230713 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5444917678833008, + "learning_rate": 7.164955905739482e-06, + "loss": 0.1849, + "step": 1652, + "teacher_loss": 0.14489763975143433 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.45087337493896484, + "learning_rate": 7.16929304611826e-06, + "loss": 0.2942, + "step": 1653, + "teacher_loss": 0.27682676911354065 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.822691023349762, + "learning_rate": 7.173630186497037e-06, + "loss": 0.3237, + "step": 1654, + "teacher_loss": 0.26830005645751953 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5252161026000977, + "learning_rate": 7.177967326875813e-06, + "loss": 0.2053, + "step": 1655, + "teacher_loss": 0.16980135440826416 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.18071994185447693, + "learning_rate": 7.18230446725459e-06, + "loss": 0.2537, + "step": 1656, + "teacher_loss": 0.2617645263671875 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 1.085675835609436, + "learning_rate": 7.186641607633368e-06, + "loss": 0.3519, + "step": 1657, + "teacher_loss": 0.2703893184661865 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.39283737540245056, + "learning_rate": 7.190978748012144e-06, + "loss": 0.2596, + "step": 1658, + "teacher_loss": 0.24481524527072906 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5701018571853638, + "learning_rate": 7.195315888390921e-06, + "loss": 0.2201, + "step": 1659, + "teacher_loss": 0.18121731281280518 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.24561578035354614, + "learning_rate": 7.199653028769697e-06, + "loss": 0.3179, + "step": 1660, + "teacher_loss": 0.3259660601615906 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.26406487822532654, + "learning_rate": 7.203990169148475e-06, + "loss": 0.2402, + "step": 1661, + "teacher_loss": 0.23756316304206848 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.4176293611526489, + "learning_rate": 7.2083273095272525e-06, + "loss": 0.2505, + "step": 1662, + "teacher_loss": 0.2318916618824005 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.3857450485229492, + "learning_rate": 7.2126644499060285e-06, + "loss": 0.2479, + "step": 1663, + "teacher_loss": 0.2325943112373352 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.2657570242881775, + "learning_rate": 7.217001590284806e-06, + "loss": 0.3175, + "step": 1664, + "teacher_loss": 0.32323789596557617 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5581217408180237, + "learning_rate": 7.221338730663583e-06, + "loss": 0.2276, + "step": 1665, + "teacher_loss": 0.19089959561824799 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.49160897731781006, + "learning_rate": 7.22567587104236e-06, + "loss": 0.3407, + "step": 1666, + "teacher_loss": 0.323901891708374 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5474512577056885, + "learning_rate": 7.230013011421136e-06, + "loss": 0.2225, + "step": 1667, + "teacher_loss": 0.1863606721162796 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.2993767559528351, + "learning_rate": 7.234350151799913e-06, + "loss": 0.1807, + "step": 1668, + "teacher_loss": 0.1674700826406479 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.37867045402526855, + "learning_rate": 7.23868729217869e-06, + "loss": 0.221, + "step": 1669, + "teacher_loss": 0.20344525575637817 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.31751367449760437, + "learning_rate": 7.243024432557468e-06, + "loss": 0.1783, + "step": 1670, + "teacher_loss": 0.16278833150863647 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5946303606033325, + "learning_rate": 7.2473615729362435e-06, + "loss": 0.264, + "step": 1671, + "teacher_loss": 0.22730335593223572 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5039011240005493, + "learning_rate": 7.251698713315021e-06, + "loss": 0.2745, + "step": 1672, + "teacher_loss": 0.24898861348628998 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.8543486595153809, + "learning_rate": 7.256035853693799e-06, + "loss": 0.3221, + "step": 1673, + "teacher_loss": 0.26295357942581177 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.1504620611667633, + "learning_rate": 7.260372994072575e-06, + "loss": 0.1629, + "step": 1674, + "teacher_loss": 0.16426241397857666 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.4494657516479492, + "learning_rate": 7.264710134451352e-06, + "loss": 0.2516, + "step": 1675, + "teacher_loss": 0.22959056496620178 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.461913526058197, + "learning_rate": 7.269047274830129e-06, + "loss": 0.2904, + "step": 1676, + "teacher_loss": 0.2713763117790222 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.2915355861186981, + "learning_rate": 7.273384415208906e-06, + "loss": 0.2472, + "step": 1677, + "teacher_loss": 0.24229495227336884 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.5916110277175903, + "learning_rate": 7.277721555587683e-06, + "loss": 0.2434, + "step": 1678, + "teacher_loss": 0.20476101338863373 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.3630271255970001, + "learning_rate": 7.282058695966459e-06, + "loss": 0.2834, + "step": 1679, + "teacher_loss": 0.27460354566574097 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.4084319770336151, + "learning_rate": 7.286395836345236e-06, + "loss": 0.2289, + "step": 1680, + "teacher_loss": 0.2089563012123108 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.11544451862573624, + "learning_rate": 7.290732976724014e-06, + "loss": 0.1416, + "step": 1681, + "teacher_loss": 0.14456146955490112 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.9449407458305359, + "learning_rate": 7.29507011710279e-06, + "loss": 0.3621, + "step": 1682, + "teacher_loss": 0.29732680320739746 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.33266693353652954, + "learning_rate": 7.299407257481567e-06, + "loss": 0.2549, + "step": 1683, + "teacher_loss": 0.24624398350715637 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.22451072931289673, + "learning_rate": 7.303744397860345e-06, + "loss": 0.1855, + "step": 1684, + "teacher_loss": 0.1812075823545456 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.6619007587432861, + "learning_rate": 7.308081538239121e-06, + "loss": 0.456, + "step": 1685, + "teacher_loss": 0.43316125869750977 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.30011308193206787, + "learning_rate": 7.3124186786178985e-06, + "loss": 0.1897, + "step": 1686, + "teacher_loss": 0.17746882140636444 + }, + { + "compression_loss": 0.0, + "epoch": 0.3, + "label_loss": 0.1647869199514389, + "learning_rate": 7.316755818996675e-06, + "loss": 0.2197, + "step": 1687, + "teacher_loss": 0.22582116723060608 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4102597236633301, + "learning_rate": 7.321092959375452e-06, + "loss": 0.2487, + "step": 1688, + "teacher_loss": 0.23072564601898193 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4120871424674988, + "learning_rate": 7.325430099754229e-06, + "loss": 0.2338, + "step": 1689, + "teacher_loss": 0.21399688720703125 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 1.1790788173675537, + "learning_rate": 7.329767240133006e-06, + "loss": 0.3521, + "step": 1690, + "teacher_loss": 0.26023560762405396 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.41928374767303467, + "learning_rate": 7.334104380511782e-06, + "loss": 0.1827, + "step": 1691, + "teacher_loss": 0.15641218423843384 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.22762858867645264, + "learning_rate": 7.33844152089056e-06, + "loss": 0.1659, + "step": 1692, + "teacher_loss": 0.15905120968818665 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.2536356747150421, + "learning_rate": 7.342778661269336e-06, + "loss": 0.2055, + "step": 1693, + "teacher_loss": 0.20010483264923096 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.3064706325531006, + "learning_rate": 7.3471158016481136e-06, + "loss": 0.206, + "step": 1694, + "teacher_loss": 0.1948208063840866 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5034622550010681, + "learning_rate": 7.351452942026891e-06, + "loss": 0.2401, + "step": 1695, + "teacher_loss": 0.21083992719650269 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.38133305311203003, + "learning_rate": 7.355790082405667e-06, + "loss": 0.1823, + "step": 1696, + "teacher_loss": 0.16017015278339386 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5878756642341614, + "learning_rate": 7.360127222784445e-06, + "loss": 0.2908, + "step": 1697, + "teacher_loss": 0.25783663988113403 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5000163316726685, + "learning_rate": 7.364464363163221e-06, + "loss": 0.2432, + "step": 1698, + "teacher_loss": 0.2146349400281906 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.15711775422096252, + "learning_rate": 7.368801503541998e-06, + "loss": 0.2729, + "step": 1699, + "teacher_loss": 0.285740464925766 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.2974385619163513, + "learning_rate": 7.373138643920775e-06, + "loss": 0.2114, + "step": 1700, + "teacher_loss": 0.20181187987327576 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.48172444105148315, + "learning_rate": 7.377475784299552e-06, + "loss": 0.3203, + "step": 1701, + "teacher_loss": 0.30236685276031494 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5313643217086792, + "learning_rate": 7.381812924678329e-06, + "loss": 0.2297, + "step": 1702, + "teacher_loss": 0.19612954556941986 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.33326053619384766, + "learning_rate": 7.386150065057106e-06, + "loss": 0.2524, + "step": 1703, + "teacher_loss": 0.24336345493793488 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.548973023891449, + "learning_rate": 7.390487205435882e-06, + "loss": 0.2708, + "step": 1704, + "teacher_loss": 0.23993246257305145 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.8691943883895874, + "learning_rate": 7.39482434581466e-06, + "loss": 0.2918, + "step": 1705, + "teacher_loss": 0.22760280966758728 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.3747527599334717, + "learning_rate": 7.399161486193437e-06, + "loss": 0.177, + "step": 1706, + "teacher_loss": 0.1550162136554718 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4190804362297058, + "learning_rate": 7.403498626572213e-06, + "loss": 0.2564, + "step": 1707, + "teacher_loss": 0.23831962049007416 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.3904881179332733, + "learning_rate": 7.407835766950991e-06, + "loss": 0.2544, + "step": 1708, + "teacher_loss": 0.239267498254776 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.8016515970230103, + "learning_rate": 7.412172907329767e-06, + "loss": 0.5098, + "step": 1709, + "teacher_loss": 0.4773510694503784 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.2168349027633667, + "learning_rate": 7.4165100477085445e-06, + "loss": 0.1438, + "step": 1710, + "teacher_loss": 0.13565593957901 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.6603478193283081, + "learning_rate": 7.420847188087321e-06, + "loss": 0.2855, + "step": 1711, + "teacher_loss": 0.24385464191436768 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4019721746444702, + "learning_rate": 7.425184328466098e-06, + "loss": 0.1988, + "step": 1712, + "teacher_loss": 0.17621468007564545 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.44434458017349243, + "learning_rate": 7.429521468844875e-06, + "loss": 0.2366, + "step": 1713, + "teacher_loss": 0.2134873867034912 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.568021833896637, + "learning_rate": 7.4338586092236525e-06, + "loss": 0.2871, + "step": 1714, + "teacher_loss": 0.2558918297290802 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.19821667671203613, + "learning_rate": 7.438195749602428e-06, + "loss": 0.2059, + "step": 1715, + "teacher_loss": 0.20677441358566284 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.442771852016449, + "learning_rate": 7.442532889981206e-06, + "loss": 0.2496, + "step": 1716, + "teacher_loss": 0.2281550019979477 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.22311145067214966, + "learning_rate": 7.446870030359983e-06, + "loss": 0.2713, + "step": 1717, + "teacher_loss": 0.27667611837387085 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.3049648404121399, + "learning_rate": 7.4512071707387596e-06, + "loss": 0.2137, + "step": 1718, + "teacher_loss": 0.2035529762506485 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5022507309913635, + "learning_rate": 7.455544311117537e-06, + "loss": 0.2142, + "step": 1719, + "teacher_loss": 0.18224143981933594 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.2592204213142395, + "learning_rate": 7.459881451496313e-06, + "loss": 0.2881, + "step": 1720, + "teacher_loss": 0.29136383533477783 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.2386520802974701, + "learning_rate": 7.464218591875091e-06, + "loss": 0.184, + "step": 1721, + "teacher_loss": 0.1779354214668274 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.6151055097579956, + "learning_rate": 7.4685557322538675e-06, + "loss": 0.2947, + "step": 1722, + "teacher_loss": 0.2590673565864563 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.7037807703018188, + "learning_rate": 7.472892872632644e-06, + "loss": 0.2442, + "step": 1723, + "teacher_loss": 0.19308754801750183 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.33382242918014526, + "learning_rate": 7.477230013011421e-06, + "loss": 0.1606, + "step": 1724, + "teacher_loss": 0.14139509201049805 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.8106041550636292, + "learning_rate": 7.481567153390199e-06, + "loss": 0.2856, + "step": 1725, + "teacher_loss": 0.22725045680999756 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.413419246673584, + "learning_rate": 7.485904293768975e-06, + "loss": 0.2293, + "step": 1726, + "teacher_loss": 0.20887622237205505 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.3871203064918518, + "learning_rate": 7.490241434147752e-06, + "loss": 0.2792, + "step": 1727, + "teacher_loss": 0.26722365617752075 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.32493478059768677, + "learning_rate": 7.494578574526529e-06, + "loss": 0.2301, + "step": 1728, + "teacher_loss": 0.2195383608341217 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4425840377807617, + "learning_rate": 7.498915714905306e-06, + "loss": 0.2911, + "step": 1729, + "teacher_loss": 0.27426621317863464 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.544608473777771, + "learning_rate": 7.5032528552840826e-06, + "loss": 0.3097, + "step": 1730, + "teacher_loss": 0.28356924653053284 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.36008691787719727, + "learning_rate": 7.507589995662859e-06, + "loss": 0.2092, + "step": 1731, + "teacher_loss": 0.1923803687095642 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4727802276611328, + "learning_rate": 7.511927136041637e-06, + "loss": 0.3376, + "step": 1732, + "teacher_loss": 0.32262080907821655 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.8734699487686157, + "learning_rate": 7.516264276420415e-06, + "loss": 0.3171, + "step": 1733, + "teacher_loss": 0.25531288981437683 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.37842100858688354, + "learning_rate": 7.52060141679919e-06, + "loss": 0.1875, + "step": 1734, + "teacher_loss": 0.16627779603004456 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.18552684783935547, + "learning_rate": 7.524938557177967e-06, + "loss": 0.2206, + "step": 1735, + "teacher_loss": 0.22451084852218628 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4525231122970581, + "learning_rate": 7.529275697556745e-06, + "loss": 0.2288, + "step": 1736, + "teacher_loss": 0.2039850652217865 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.31749141216278076, + "learning_rate": 7.533612837935522e-06, + "loss": 0.2571, + "step": 1737, + "teacher_loss": 0.250375360250473 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.4055234491825104, + "learning_rate": 7.537949978314299e-06, + "loss": 0.2859, + "step": 1738, + "teacher_loss": 0.2725864350795746 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.3373218774795532, + "learning_rate": 7.542287118693074e-06, + "loss": 0.2262, + "step": 1739, + "teacher_loss": 0.21390564739704132 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5933299660682678, + "learning_rate": 7.546624259071852e-06, + "loss": 0.3101, + "step": 1740, + "teacher_loss": 0.2786233425140381 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.25776606798171997, + "learning_rate": 7.55096139945063e-06, + "loss": 0.2206, + "step": 1741, + "teacher_loss": 0.216432124376297 + }, + { + "compression_loss": 0.0, + "epoch": 0.31, + "label_loss": 0.5019830465316772, + "learning_rate": 7.555298539829406e-06, + "loss": 0.2842, + "step": 1742, + "teacher_loss": 0.26005327701568604 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.2376137375831604, + "learning_rate": 7.559635680208182e-06, + "loss": 0.1845, + "step": 1743, + "teacher_loss": 0.17865246534347534 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.2858850955963135, + "learning_rate": 7.56397282058696e-06, + "loss": 0.2729, + "step": 1744, + "teacher_loss": 0.2714817523956299 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.3677661716938019, + "learning_rate": 7.568309960965737e-06, + "loss": 0.3363, + "step": 1745, + "teacher_loss": 0.3328477144241333 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.81312495470047, + "learning_rate": 7.572647101344514e-06, + "loss": 0.2042, + "step": 1746, + "teacher_loss": 0.13654255867004395 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.15854638814926147, + "learning_rate": 7.5769842417232894e-06, + "loss": 0.1958, + "step": 1747, + "teacher_loss": 0.19993603229522705 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.2754839360713959, + "learning_rate": 7.581321382102067e-06, + "loss": 0.255, + "step": 1748, + "teacher_loss": 0.2527409791946411 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.6465566158294678, + "learning_rate": 7.585658522480845e-06, + "loss": 0.271, + "step": 1749, + "teacher_loss": 0.22926507890224457 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4403068423271179, + "learning_rate": 7.5899956628596214e-06, + "loss": 0.2639, + "step": 1750, + "teacher_loss": 0.24433518946170807 + }, + { + "epoch": 0.32, + "eval_exact_match": 79.46073793755913, + "eval_f1": 87.19280246592186, + "step": 1750 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.5703877210617065, + "learning_rate": 7.594332803238399e-06, + "loss": 0.2364, + "step": 1751, + "teacher_loss": 0.19933678209781647 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.37232139706611633, + "learning_rate": 7.598669943617175e-06, + "loss": 0.249, + "step": 1752, + "teacher_loss": 0.2352568507194519 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4878363609313965, + "learning_rate": 7.603007083995952e-06, + "loss": 0.2832, + "step": 1753, + "teacher_loss": 0.26045307517051697 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.26372215151786804, + "learning_rate": 7.607344224374729e-06, + "loss": 0.1783, + "step": 1754, + "teacher_loss": 0.1687929928302765 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.5693401098251343, + "learning_rate": 7.611681364753507e-06, + "loss": 0.2294, + "step": 1755, + "teacher_loss": 0.19168388843536377 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4138832986354828, + "learning_rate": 7.616018505132282e-06, + "loss": 0.22, + "step": 1756, + "teacher_loss": 0.1984454095363617 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.3313694894313812, + "learning_rate": 7.62035564551106e-06, + "loss": 0.2567, + "step": 1757, + "teacher_loss": 0.24837106466293335 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.12037936598062515, + "learning_rate": 7.6246927858898365e-06, + "loss": 0.1517, + "step": 1758, + "teacher_loss": 0.15522542595863342 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.29638609290122986, + "learning_rate": 7.629029926268614e-06, + "loss": 0.2482, + "step": 1759, + "teacher_loss": 0.24282485246658325 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4636688232421875, + "learning_rate": 7.633367066647392e-06, + "loss": 0.2456, + "step": 1760, + "teacher_loss": 0.22139650583267212 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.15306410193443298, + "learning_rate": 7.637704207026168e-06, + "loss": 0.178, + "step": 1761, + "teacher_loss": 0.18078546226024628 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.46510058641433716, + "learning_rate": 7.642041347404944e-06, + "loss": 0.2946, + "step": 1762, + "teacher_loss": 0.27562767267227173 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.31164616346359253, + "learning_rate": 7.646378487783721e-06, + "loss": 0.2655, + "step": 1763, + "teacher_loss": 0.2603718638420105 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.34508180618286133, + "learning_rate": 7.650715628162499e-06, + "loss": 0.2153, + "step": 1764, + "teacher_loss": 0.20088255405426025 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.43271222710609436, + "learning_rate": 7.655052768541275e-06, + "loss": 0.2352, + "step": 1765, + "teacher_loss": 0.21324174106121063 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.48831379413604736, + "learning_rate": 7.659389908920052e-06, + "loss": 0.2546, + "step": 1766, + "teacher_loss": 0.22868716716766357 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.6186795234680176, + "learning_rate": 7.66372704929883e-06, + "loss": 0.2912, + "step": 1767, + "teacher_loss": 0.25483590364456177 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.6985093355178833, + "learning_rate": 7.668064189677606e-06, + "loss": 0.2768, + "step": 1768, + "teacher_loss": 0.22995707392692566 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.5289044380187988, + "learning_rate": 7.672401330056382e-06, + "loss": 0.242, + "step": 1769, + "teacher_loss": 0.21010245382785797 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.40761590003967285, + "learning_rate": 7.67673847043516e-06, + "loss": 0.2587, + "step": 1770, + "teacher_loss": 0.24210332334041595 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.3014605939388275, + "learning_rate": 7.681075610813937e-06, + "loss": 0.2236, + "step": 1771, + "teacher_loss": 0.21492652595043182 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.13060994446277618, + "learning_rate": 7.685412751192715e-06, + "loss": 0.2046, + "step": 1772, + "teacher_loss": 0.21283230185508728 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.29701292514801025, + "learning_rate": 7.68974989157149e-06, + "loss": 0.2458, + "step": 1773, + "teacher_loss": 0.24006086587905884 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.9122977256774902, + "learning_rate": 7.694087031950267e-06, + "loss": 0.4663, + "step": 1774, + "teacher_loss": 0.4167540371417999 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.36518651247024536, + "learning_rate": 7.698424172329044e-06, + "loss": 0.198, + "step": 1775, + "teacher_loss": 0.17946107685565948 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.5476127862930298, + "learning_rate": 7.702761312707822e-06, + "loss": 0.2344, + "step": 1776, + "teacher_loss": 0.19961652159690857 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.8547636270523071, + "learning_rate": 7.7070984530866e-06, + "loss": 0.2813, + "step": 1777, + "teacher_loss": 0.21758529543876648 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.7159478664398193, + "learning_rate": 7.711435593465375e-06, + "loss": 0.3423, + "step": 1778, + "teacher_loss": 0.30079466104507446 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.5323176383972168, + "learning_rate": 7.715772733844151e-06, + "loss": 0.3233, + "step": 1779, + "teacher_loss": 0.3000541925430298 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4683852195739746, + "learning_rate": 7.720109874222929e-06, + "loss": 0.2345, + "step": 1780, + "teacher_loss": 0.2085098922252655 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.497215211391449, + "learning_rate": 7.724447014601707e-06, + "loss": 0.2614, + "step": 1781, + "teacher_loss": 0.23515203595161438 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.47892889380455017, + "learning_rate": 7.728784154980484e-06, + "loss": 0.2167, + "step": 1782, + "teacher_loss": 0.18751531839370728 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.9846535921096802, + "learning_rate": 7.73312129535926e-06, + "loss": 0.2994, + "step": 1783, + "teacher_loss": 0.22326920926570892 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.23477408289909363, + "learning_rate": 7.737458435738036e-06, + "loss": 0.1814, + "step": 1784, + "teacher_loss": 0.1755068451166153 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.3676646947860718, + "learning_rate": 7.741795576116814e-06, + "loss": 0.2297, + "step": 1785, + "teacher_loss": 0.21437790989875793 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.5792921781539917, + "learning_rate": 7.746132716495591e-06, + "loss": 0.3182, + "step": 1786, + "teacher_loss": 0.2891741991043091 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.32727351784706116, + "learning_rate": 7.750469856874367e-06, + "loss": 0.2589, + "step": 1787, + "teacher_loss": 0.25128233432769775 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4121859669685364, + "learning_rate": 7.754806997253145e-06, + "loss": 0.2848, + "step": 1788, + "teacher_loss": 0.2706039547920227 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.59428471326828, + "learning_rate": 7.759144137631922e-06, + "loss": 0.26, + "step": 1789, + "teacher_loss": 0.222828209400177 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.3319006562232971, + "learning_rate": 7.763481278010698e-06, + "loss": 0.2146, + "step": 1790, + "teacher_loss": 0.20156118273735046 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.649003267288208, + "learning_rate": 7.767818418389474e-06, + "loss": 0.2607, + "step": 1791, + "teacher_loss": 0.21756139397621155 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.6035705804824829, + "learning_rate": 7.772155558768252e-06, + "loss": 0.2995, + "step": 1792, + "teacher_loss": 0.2657451629638672 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.2995683550834656, + "learning_rate": 7.77649269914703e-06, + "loss": 0.2358, + "step": 1793, + "teacher_loss": 0.22875794768333435 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.6605123281478882, + "learning_rate": 7.780829839525807e-06, + "loss": 0.3122, + "step": 1794, + "teacher_loss": 0.2735482156276703 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4632706046104431, + "learning_rate": 7.785166979904583e-06, + "loss": 0.3876, + "step": 1795, + "teacher_loss": 0.3792091906070709 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.3336637318134308, + "learning_rate": 7.789504120283359e-06, + "loss": 0.2412, + "step": 1796, + "teacher_loss": 0.2309657335281372 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.23243877291679382, + "learning_rate": 7.793841260662137e-06, + "loss": 0.2041, + "step": 1797, + "teacher_loss": 0.20097434520721436 + }, + { + "compression_loss": 0.0, + "epoch": 0.32, + "label_loss": 0.4276742935180664, + "learning_rate": 7.798178401040914e-06, + "loss": 0.2569, + "step": 1798, + "teacher_loss": 0.23787957429885864 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.4468705654144287, + "learning_rate": 7.802515541419692e-06, + "loss": 0.2468, + "step": 1799, + "teacher_loss": 0.22459043562412262 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.30764341354370117, + "learning_rate": 7.806852681798468e-06, + "loss": 0.1724, + "step": 1800, + "teacher_loss": 0.15739014744758606 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.6394176483154297, + "learning_rate": 7.811189822177244e-06, + "loss": 0.2566, + "step": 1801, + "teacher_loss": 0.2140173316001892 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.4904750883579254, + "learning_rate": 7.815526962556021e-06, + "loss": 0.2335, + "step": 1802, + "teacher_loss": 0.2049984633922577 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.5518819093704224, + "learning_rate": 7.819864102934799e-06, + "loss": 0.2592, + "step": 1803, + "teacher_loss": 0.2266334891319275 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.413704514503479, + "learning_rate": 7.824201243313577e-06, + "loss": 0.2505, + "step": 1804, + "teacher_loss": 0.23231223225593567 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.13127173483371735, + "learning_rate": 7.828538383692353e-06, + "loss": 0.1518, + "step": 1805, + "teacher_loss": 0.1541162133216858 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.35204431414604187, + "learning_rate": 7.832875524071128e-06, + "loss": 0.1947, + "step": 1806, + "teacher_loss": 0.17721673846244812 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.5221048593521118, + "learning_rate": 7.837212664449906e-06, + "loss": 0.361, + "step": 1807, + "teacher_loss": 0.3430839478969574 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.45229244232177734, + "learning_rate": 7.841549804828684e-06, + "loss": 0.1856, + "step": 1808, + "teacher_loss": 0.1559458076953888 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.699129045009613, + "learning_rate": 7.84588694520746e-06, + "loss": 0.2985, + "step": 1809, + "teacher_loss": 0.25393155217170715 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.40289968252182007, + "learning_rate": 7.850224085586237e-06, + "loss": 0.1812, + "step": 1810, + "teacher_loss": 0.1565280258655548 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.3219031095504761, + "learning_rate": 7.854561225965013e-06, + "loss": 0.2199, + "step": 1811, + "teacher_loss": 0.208558589220047 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.6861417889595032, + "learning_rate": 7.85889836634379e-06, + "loss": 0.3468, + "step": 1812, + "teacher_loss": 0.3090746998786926 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.2918952703475952, + "learning_rate": 7.863235506722567e-06, + "loss": 0.2331, + "step": 1813, + "teacher_loss": 0.22654855251312256 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.39952874183654785, + "learning_rate": 7.867572647101344e-06, + "loss": 0.2108, + "step": 1814, + "teacher_loss": 0.18978875875473022 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.6358152627944946, + "learning_rate": 7.871909787480122e-06, + "loss": 0.245, + "step": 1815, + "teacher_loss": 0.20152145624160767 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.6016253232955933, + "learning_rate": 7.8762469278589e-06, + "loss": 0.2378, + "step": 1816, + "teacher_loss": 0.19731970131397247 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.5367385149002075, + "learning_rate": 7.880584068237676e-06, + "loss": 0.2971, + "step": 1817, + "teacher_loss": 0.2705267071723938 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.36606889963150024, + "learning_rate": 7.884921208616451e-06, + "loss": 0.1835, + "step": 1818, + "teacher_loss": 0.1631900668144226 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.32446256279945374, + "learning_rate": 7.889258348995229e-06, + "loss": 0.2417, + "step": 1819, + "teacher_loss": 0.23246291279792786 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.4756159484386444, + "learning_rate": 7.893595489374007e-06, + "loss": 0.2695, + "step": 1820, + "teacher_loss": 0.24654775857925415 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.7762539982795715, + "learning_rate": 7.897932629752784e-06, + "loss": 0.3051, + "step": 1821, + "teacher_loss": 0.25277841091156006 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.6988166570663452, + "learning_rate": 7.902269770131559e-06, + "loss": 0.2872, + "step": 1822, + "teacher_loss": 0.24142813682556152 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.8394314050674438, + "learning_rate": 7.906606910510336e-06, + "loss": 0.3168, + "step": 1823, + "teacher_loss": 0.2587035894393921 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.362871915102005, + "learning_rate": 7.910944050889114e-06, + "loss": 0.2392, + "step": 1824, + "teacher_loss": 0.22541531920433044 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.282943457365036, + "learning_rate": 7.915281191267891e-06, + "loss": 0.238, + "step": 1825, + "teacher_loss": 0.23297810554504395 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.5541413426399231, + "learning_rate": 7.919618331646669e-06, + "loss": 0.2908, + "step": 1826, + "teacher_loss": 0.26150596141815186 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.32107317447662354, + "learning_rate": 7.923955472025445e-06, + "loss": 0.1861, + "step": 1827, + "teacher_loss": 0.17114490270614624 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.22877249121665955, + "learning_rate": 7.928292612404221e-06, + "loss": 0.1852, + "step": 1828, + "teacher_loss": 0.18038858473300934 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.11934243142604828, + "learning_rate": 7.932629752782999e-06, + "loss": 0.1836, + "step": 1829, + "teacher_loss": 0.19077277183532715 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.2745369076728821, + "learning_rate": 7.936966893161776e-06, + "loss": 0.2884, + "step": 1830, + "teacher_loss": 0.28991490602493286 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.11852400004863739, + "learning_rate": 7.941304033540552e-06, + "loss": 0.2759, + "step": 1831, + "teacher_loss": 0.29337313771247864 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.47853589057922363, + "learning_rate": 7.94564117391933e-06, + "loss": 0.2427, + "step": 1832, + "teacher_loss": 0.21647757291793823 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.5026630759239197, + "learning_rate": 7.949978314298106e-06, + "loss": 0.2874, + "step": 1833, + "teacher_loss": 0.2634488344192505 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.21414603292942047, + "learning_rate": 7.954315454676883e-06, + "loss": 0.3274, + "step": 1834, + "teacher_loss": 0.33998072147369385 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.7230276465415955, + "learning_rate": 7.95865259505566e-06, + "loss": 0.3458, + "step": 1835, + "teacher_loss": 0.30385270714759827 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.37154334783554077, + "learning_rate": 7.962989735434437e-06, + "loss": 0.2456, + "step": 1836, + "teacher_loss": 0.23164236545562744 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.47035640478134155, + "learning_rate": 7.967326875813214e-06, + "loss": 0.227, + "step": 1837, + "teacher_loss": 0.1999903917312622 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.567501425743103, + "learning_rate": 7.971664016191992e-06, + "loss": 0.2413, + "step": 1838, + "teacher_loss": 0.20501494407653809 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.20935213565826416, + "learning_rate": 7.976001156570768e-06, + "loss": 0.1791, + "step": 1839, + "teacher_loss": 0.1757459044456482 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.37829095125198364, + "learning_rate": 7.980338296949544e-06, + "loss": 0.258, + "step": 1840, + "teacher_loss": 0.24460014700889587 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.5724002718925476, + "learning_rate": 7.984675437328322e-06, + "loss": 0.2893, + "step": 1841, + "teacher_loss": 0.2578818202018738 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.4348899722099304, + "learning_rate": 7.989012577707099e-06, + "loss": 0.4818, + "step": 1842, + "teacher_loss": 0.48706772923469543 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.2693827152252197, + "learning_rate": 7.993349718085877e-06, + "loss": 0.2129, + "step": 1843, + "teacher_loss": 0.20658424496650696 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.2475694715976715, + "learning_rate": 7.997686858464651e-06, + "loss": 0.2334, + "step": 1844, + "teacher_loss": 0.23186978697776794 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.4242836833000183, + "learning_rate": 8.002023998843429e-06, + "loss": 0.2774, + "step": 1845, + "teacher_loss": 0.2610628306865692 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.23021869361400604, + "learning_rate": 8.006361139222206e-06, + "loss": 0.1816, + "step": 1846, + "teacher_loss": 0.17618069052696228 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.22081930935382843, + "learning_rate": 8.010698279600984e-06, + "loss": 0.2012, + "step": 1847, + "teacher_loss": 0.19905588030815125 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 1.1445214748382568, + "learning_rate": 8.015035419979761e-06, + "loss": 0.2835, + "step": 1848, + "teacher_loss": 0.18778514862060547 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.4486529231071472, + "learning_rate": 8.019372560358537e-06, + "loss": 0.2777, + "step": 1849, + "teacher_loss": 0.2587205469608307 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.3795764148235321, + "learning_rate": 8.023709700737313e-06, + "loss": 0.2392, + "step": 1850, + "teacher_loss": 0.22363372147083282 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.46964168548583984, + "learning_rate": 8.028046841116091e-06, + "loss": 0.2773, + "step": 1851, + "teacher_loss": 0.25590986013412476 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.38283711671829224, + "learning_rate": 8.032383981494869e-06, + "loss": 0.2398, + "step": 1852, + "teacher_loss": 0.22394323348999023 + }, + { + "compression_loss": 0.0, + "epoch": 0.33, + "label_loss": 0.3926635682582855, + "learning_rate": 8.036721121873645e-06, + "loss": 0.3141, + "step": 1853, + "teacher_loss": 0.3053417205810547 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.38047635555267334, + "learning_rate": 8.041058262252422e-06, + "loss": 0.2538, + "step": 1854, + "teacher_loss": 0.23969416320323944 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.4139835238456726, + "learning_rate": 8.045395402631198e-06, + "loss": 0.5321, + "step": 1855, + "teacher_loss": 0.5452369451522827 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.6319215297698975, + "learning_rate": 8.049732543009976e-06, + "loss": 0.3191, + "step": 1856, + "teacher_loss": 0.28429466485977173 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.43632233142852783, + "learning_rate": 8.054069683388753e-06, + "loss": 0.2113, + "step": 1857, + "teacher_loss": 0.18629267811775208 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.28551626205444336, + "learning_rate": 8.05840682376753e-06, + "loss": 0.2541, + "step": 1858, + "teacher_loss": 0.25059640407562256 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.30213332176208496, + "learning_rate": 8.062743964146307e-06, + "loss": 0.2116, + "step": 1859, + "teacher_loss": 0.20150771737098694 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5224485397338867, + "learning_rate": 8.067081104525083e-06, + "loss": 0.3494, + "step": 1860, + "teacher_loss": 0.3301740884780884 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.4405520558357239, + "learning_rate": 8.07141824490386e-06, + "loss": 0.1956, + "step": 1861, + "teacher_loss": 0.1684151589870453 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.7682600617408752, + "learning_rate": 8.075755385282636e-06, + "loss": 0.3387, + "step": 1862, + "teacher_loss": 0.29100501537323 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.4820849597454071, + "learning_rate": 8.080092525661414e-06, + "loss": 0.2092, + "step": 1863, + "teacher_loss": 0.17890843749046326 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5034163594245911, + "learning_rate": 8.084429666040192e-06, + "loss": 0.3517, + "step": 1864, + "teacher_loss": 0.334837943315506 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.3912999927997589, + "learning_rate": 8.08876680641897e-06, + "loss": 0.1784, + "step": 1865, + "teacher_loss": 0.154697448015213 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.47309085726737976, + "learning_rate": 8.093103946797743e-06, + "loss": 0.28, + "step": 1866, + "teacher_loss": 0.2585349977016449 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.37852251529693604, + "learning_rate": 8.097441087176521e-06, + "loss": 0.1871, + "step": 1867, + "teacher_loss": 0.16582050919532776 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.3048030734062195, + "learning_rate": 8.101778227555299e-06, + "loss": 0.2125, + "step": 1868, + "teacher_loss": 0.20228412747383118 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.33019232749938965, + "learning_rate": 8.106115367934076e-06, + "loss": 0.3206, + "step": 1869, + "teacher_loss": 0.31949031352996826 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.2795789837837219, + "learning_rate": 8.110452508312854e-06, + "loss": 0.2815, + "step": 1870, + "teacher_loss": 0.28176620602607727 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.45445406436920166, + "learning_rate": 8.114789648691628e-06, + "loss": 0.2391, + "step": 1871, + "teacher_loss": 0.21512597799301147 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.39225757122039795, + "learning_rate": 8.119126789070406e-06, + "loss": 0.2256, + "step": 1872, + "teacher_loss": 0.20707634091377258 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5685315132141113, + "learning_rate": 8.123463929449183e-06, + "loss": 0.2673, + "step": 1873, + "teacher_loss": 0.23386883735656738 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.8287670612335205, + "learning_rate": 8.127801069827961e-06, + "loss": 0.2696, + "step": 1874, + "teacher_loss": 0.20750975608825684 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5341501235961914, + "learning_rate": 8.132138210206737e-06, + "loss": 0.2647, + "step": 1875, + "teacher_loss": 0.23473864793777466 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5260509252548218, + "learning_rate": 8.136475350585515e-06, + "loss": 0.2295, + "step": 1876, + "teacher_loss": 0.19658933579921722 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.24463702738285065, + "learning_rate": 8.14081249096429e-06, + "loss": 0.1556, + "step": 1877, + "teacher_loss": 0.14567025005817413 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5799046754837036, + "learning_rate": 8.145149631343068e-06, + "loss": 0.2654, + "step": 1878, + "teacher_loss": 0.23040470480918884 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.36795681715011597, + "learning_rate": 8.149486771721846e-06, + "loss": 0.2787, + "step": 1879, + "teacher_loss": 0.26876747608184814 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.44979578256607056, + "learning_rate": 8.153823912100622e-06, + "loss": 0.2507, + "step": 1880, + "teacher_loss": 0.22854046523571014 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.3928729295730591, + "learning_rate": 8.1581610524794e-06, + "loss": 0.2171, + "step": 1881, + "teacher_loss": 0.19758939743041992 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.32123103737831116, + "learning_rate": 8.162498192858175e-06, + "loss": 0.2037, + "step": 1882, + "teacher_loss": 0.19061529636383057 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.35667696595191956, + "learning_rate": 8.166835333236953e-06, + "loss": 0.2124, + "step": 1883, + "teacher_loss": 0.1963464319705963 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.3588745594024658, + "learning_rate": 8.171172473615729e-06, + "loss": 0.198, + "step": 1884, + "teacher_loss": 0.1800839900970459 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.2592361569404602, + "learning_rate": 8.175509613994506e-06, + "loss": 0.1916, + "step": 1885, + "teacher_loss": 0.1841181218624115 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.7408689260482788, + "learning_rate": 8.179846754373284e-06, + "loss": 0.2605, + "step": 1886, + "teacher_loss": 0.20710447430610657 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.9991213083267212, + "learning_rate": 8.184183894752062e-06, + "loss": 0.5528, + "step": 1887, + "teacher_loss": 0.5031658411026001 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.6155551671981812, + "learning_rate": 8.188521035130836e-06, + "loss": 0.2379, + "step": 1888, + "teacher_loss": 0.19594156742095947 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.43069207668304443, + "learning_rate": 8.192858175509614e-06, + "loss": 0.2532, + "step": 1889, + "teacher_loss": 0.23347456753253937 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.24603256583213806, + "learning_rate": 8.197195315888391e-06, + "loss": 0.1704, + "step": 1890, + "teacher_loss": 0.1620493233203888 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.38412415981292725, + "learning_rate": 8.201532456267169e-06, + "loss": 0.2486, + "step": 1891, + "teacher_loss": 0.23351064324378967 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.22729331254959106, + "learning_rate": 8.205869596645946e-06, + "loss": 0.207, + "step": 1892, + "teacher_loss": 0.2047477513551712 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.41279059648513794, + "learning_rate": 8.21020673702472e-06, + "loss": 0.3292, + "step": 1893, + "teacher_loss": 0.31993693113327026 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.43090611696243286, + "learning_rate": 8.214543877403498e-06, + "loss": 0.2179, + "step": 1894, + "teacher_loss": 0.19422873854637146 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.40354040265083313, + "learning_rate": 8.218881017782276e-06, + "loss": 0.205, + "step": 1895, + "teacher_loss": 0.18296034634113312 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.4139193296432495, + "learning_rate": 8.223218158161053e-06, + "loss": 0.2601, + "step": 1896, + "teacher_loss": 0.2430555820465088 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5586681365966797, + "learning_rate": 8.22755529853983e-06, + "loss": 0.2272, + "step": 1897, + "teacher_loss": 0.19037766754627228 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.11299015581607819, + "learning_rate": 8.231892438918607e-06, + "loss": 0.1714, + "step": 1898, + "teacher_loss": 0.17792829871177673 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.9816170334815979, + "learning_rate": 8.236229579297383e-06, + "loss": 0.2945, + "step": 1899, + "teacher_loss": 0.21817022562026978 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.48324888944625854, + "learning_rate": 8.24056671967616e-06, + "loss": 0.2699, + "step": 1900, + "teacher_loss": 0.24622660875320435 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.7907062768936157, + "learning_rate": 8.244903860054938e-06, + "loss": 0.314, + "step": 1901, + "teacher_loss": 0.261014461517334 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5526193976402283, + "learning_rate": 8.249241000433714e-06, + "loss": 0.2596, + "step": 1902, + "teacher_loss": 0.2270744889974594 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.49716830253601074, + "learning_rate": 8.253578140812492e-06, + "loss": 0.2379, + "step": 1903, + "teacher_loss": 0.2091204673051834 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.5063827037811279, + "learning_rate": 8.257915281191268e-06, + "loss": 0.3117, + "step": 1904, + "teacher_loss": 0.2900693714618683 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.49774765968322754, + "learning_rate": 8.262252421570045e-06, + "loss": 0.2549, + "step": 1905, + "teacher_loss": 0.22790485620498657 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.07791583985090256, + "learning_rate": 8.266589561948821e-06, + "loss": 0.1852, + "step": 1906, + "teacher_loss": 0.19706636667251587 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.3394966721534729, + "learning_rate": 8.270926702327599e-06, + "loss": 0.1758, + "step": 1907, + "teacher_loss": 0.1576264500617981 + }, + { + "compression_loss": 0.0, + "epoch": 0.34, + "label_loss": 0.14728473126888275, + "learning_rate": 8.275263842706376e-06, + "loss": 0.1815, + "step": 1908, + "teacher_loss": 0.18535292148590088 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.3011743724346161, + "learning_rate": 8.279600983085152e-06, + "loss": 0.2428, + "step": 1909, + "teacher_loss": 0.23629418015480042 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.23928838968276978, + "learning_rate": 8.283938123463928e-06, + "loss": 0.2582, + "step": 1910, + "teacher_loss": 0.2603057026863098 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6685923933982849, + "learning_rate": 8.288275263842706e-06, + "loss": 0.2905, + "step": 1911, + "teacher_loss": 0.24852657318115234 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2540750801563263, + "learning_rate": 8.292612404221484e-06, + "loss": 0.2204, + "step": 1912, + "teacher_loss": 0.21667227149009705 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2171754240989685, + "learning_rate": 8.296949544600261e-06, + "loss": 0.2504, + "step": 1913, + "teacher_loss": 0.25403815507888794 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.5087028741836548, + "learning_rate": 8.301286684979039e-06, + "loss": 0.2456, + "step": 1914, + "teacher_loss": 0.2163279950618744 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.3206031620502472, + "learning_rate": 8.305623825357813e-06, + "loss": 0.2478, + "step": 1915, + "teacher_loss": 0.2397019863128662 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.3642832636833191, + "learning_rate": 8.30996096573659e-06, + "loss": 0.1778, + "step": 1916, + "teacher_loss": 0.15712422132492065 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.33998531103134155, + "learning_rate": 8.314298106115368e-06, + "loss": 0.2724, + "step": 1917, + "teacher_loss": 0.26492470502853394 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.8645210266113281, + "learning_rate": 8.318635246494146e-06, + "loss": 0.3185, + "step": 1918, + "teacher_loss": 0.25780972838401794 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.5151434540748596, + "learning_rate": 8.322972386872922e-06, + "loss": 0.2398, + "step": 1919, + "teacher_loss": 0.2092100828886032 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.7129989266395569, + "learning_rate": 8.327309527251698e-06, + "loss": 0.3279, + "step": 1920, + "teacher_loss": 0.2850641906261444 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6337594985961914, + "learning_rate": 8.331646667630475e-06, + "loss": 0.2171, + "step": 1921, + "teacher_loss": 0.17081128060817719 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2058110535144806, + "learning_rate": 8.335983808009253e-06, + "loss": 0.2572, + "step": 1922, + "teacher_loss": 0.26286107301712036 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.5388700366020203, + "learning_rate": 8.34032094838803e-06, + "loss": 0.2236, + "step": 1923, + "teacher_loss": 0.18857789039611816 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4667588472366333, + "learning_rate": 8.344658088766807e-06, + "loss": 0.2655, + "step": 1924, + "teacher_loss": 0.24310317635536194 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.7177525758743286, + "learning_rate": 8.348995229145584e-06, + "loss": 0.3799, + "step": 1925, + "teacher_loss": 0.3423910140991211 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.21521690487861633, + "learning_rate": 8.35333236952436e-06, + "loss": 0.2158, + "step": 1926, + "teacher_loss": 0.2158641517162323 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6600738763809204, + "learning_rate": 8.357669509903138e-06, + "loss": 0.2781, + "step": 1927, + "teacher_loss": 0.23561862111091614 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 1.013218641281128, + "learning_rate": 8.362006650281914e-06, + "loss": 0.3579, + "step": 1928, + "teacher_loss": 0.28506553173065186 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.5831384062767029, + "learning_rate": 8.366343790660691e-06, + "loss": 0.2954, + "step": 1929, + "teacher_loss": 0.2633988857269287 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.5085728168487549, + "learning_rate": 8.370680931039469e-06, + "loss": 0.2627, + "step": 1930, + "teacher_loss": 0.23541685938835144 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4259492754936218, + "learning_rate": 8.375018071418245e-06, + "loss": 0.2922, + "step": 1931, + "teacher_loss": 0.2773159444332123 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.41211897134780884, + "learning_rate": 8.37935521179702e-06, + "loss": 0.2966, + "step": 1932, + "teacher_loss": 0.28374889492988586 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4677123725414276, + "learning_rate": 8.383692352175798e-06, + "loss": 0.3102, + "step": 1933, + "teacher_loss": 0.29270684719085693 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4804128408432007, + "learning_rate": 8.388029492554576e-06, + "loss": 0.2588, + "step": 1934, + "teacher_loss": 0.23421144485473633 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.5016388297080994, + "learning_rate": 8.392366632933354e-06, + "loss": 0.1815, + "step": 1935, + "teacher_loss": 0.14591535925865173 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.19861455261707306, + "learning_rate": 8.396703773312131e-06, + "loss": 0.202, + "step": 1936, + "teacher_loss": 0.2024187445640564 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.23110893368721008, + "learning_rate": 8.401040913690905e-06, + "loss": 0.1938, + "step": 1937, + "teacher_loss": 0.1896395981311798 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.23265612125396729, + "learning_rate": 8.405378054069683e-06, + "loss": 0.2173, + "step": 1938, + "teacher_loss": 0.21555998921394348 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.885893702507019, + "learning_rate": 8.40971519444846e-06, + "loss": 0.3835, + "step": 1939, + "teacher_loss": 0.32764244079589844 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.21131856739521027, + "learning_rate": 8.414052334827238e-06, + "loss": 0.1986, + "step": 1940, + "teacher_loss": 0.19723433256149292 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.22887814044952393, + "learning_rate": 8.418389475206014e-06, + "loss": 0.2178, + "step": 1941, + "teacher_loss": 0.21652166545391083 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2977261245250702, + "learning_rate": 8.42272661558479e-06, + "loss": 0.2457, + "step": 1942, + "teacher_loss": 0.23992031812667847 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.49699923396110535, + "learning_rate": 8.427063755963568e-06, + "loss": 0.3613, + "step": 1943, + "teacher_loss": 0.34621569514274597 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 1.1092720031738281, + "learning_rate": 8.431400896342345e-06, + "loss": 0.3127, + "step": 1944, + "teacher_loss": 0.22424188256263733 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6106966733932495, + "learning_rate": 8.435738036721123e-06, + "loss": 0.2614, + "step": 1945, + "teacher_loss": 0.22261390089988708 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4760059714317322, + "learning_rate": 8.440075177099899e-06, + "loss": 0.2849, + "step": 1946, + "teacher_loss": 0.2637171745300293 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2203870415687561, + "learning_rate": 8.444412317478677e-06, + "loss": 0.1845, + "step": 1947, + "teacher_loss": 0.18046513199806213 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.17986935377120972, + "learning_rate": 8.448749457857453e-06, + "loss": 0.1808, + "step": 1948, + "teacher_loss": 0.18089967966079712 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6490846872329712, + "learning_rate": 8.45308659823623e-06, + "loss": 0.2337, + "step": 1949, + "teacher_loss": 0.18756935000419617 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.429772287607193, + "learning_rate": 8.457423738615006e-06, + "loss": 0.2072, + "step": 1950, + "teacher_loss": 0.1824311465024948 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2201623022556305, + "learning_rate": 8.461760878993784e-06, + "loss": 0.2901, + "step": 1951, + "teacher_loss": 0.2978992462158203 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.31017157435417175, + "learning_rate": 8.466098019372561e-06, + "loss": 0.2671, + "step": 1952, + "teacher_loss": 0.26235055923461914 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.787773847579956, + "learning_rate": 8.470435159751337e-06, + "loss": 0.2896, + "step": 1953, + "teacher_loss": 0.23421788215637207 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.3537115454673767, + "learning_rate": 8.474772300130113e-06, + "loss": 0.2512, + "step": 1954, + "teacher_loss": 0.23979242146015167 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.2504391372203827, + "learning_rate": 8.47910944050889e-06, + "loss": 0.25, + "step": 1955, + "teacher_loss": 0.2500060498714447 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.49551188945770264, + "learning_rate": 8.483446580887668e-06, + "loss": 0.2184, + "step": 1956, + "teacher_loss": 0.18759498000144958 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6004061102867126, + "learning_rate": 8.487783721266446e-06, + "loss": 0.2364, + "step": 1957, + "teacher_loss": 0.19599968194961548 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.3101799488067627, + "learning_rate": 8.492120861645222e-06, + "loss": 0.2937, + "step": 1958, + "teacher_loss": 0.2918395698070526 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.6094840168952942, + "learning_rate": 8.496458002023998e-06, + "loss": 0.3062, + "step": 1959, + "teacher_loss": 0.2724979519844055 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.9341248273849487, + "learning_rate": 8.500795142402776e-06, + "loss": 0.2208, + "step": 1960, + "teacher_loss": 0.14158010482788086 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.8260982036590576, + "learning_rate": 8.505132282781553e-06, + "loss": 0.2861, + "step": 1961, + "teacher_loss": 0.22607558965682983 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4312358498573303, + "learning_rate": 8.50946942316033e-06, + "loss": 0.2948, + "step": 1962, + "teacher_loss": 0.2796017825603485 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4646150469779968, + "learning_rate": 8.513806563539107e-06, + "loss": 0.2254, + "step": 1963, + "teacher_loss": 0.19886037707328796 + }, + { + "compression_loss": 0.0, + "epoch": 0.35, + "label_loss": 0.4066265821456909, + "learning_rate": 8.518143703917883e-06, + "loss": 0.223, + "step": 1964, + "teacher_loss": 0.20255476236343384 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3543008863925934, + "learning_rate": 8.52248084429666e-06, + "loss": 0.1826, + "step": 1965, + "teacher_loss": 0.1635093241930008 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.273688405752182, + "learning_rate": 8.526817984675438e-06, + "loss": 0.1896, + "step": 1966, + "teacher_loss": 0.18022316694259644 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.4326523542404175, + "learning_rate": 8.531155125054216e-06, + "loss": 0.3011, + "step": 1967, + "teacher_loss": 0.28644561767578125 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.30893221497535706, + "learning_rate": 8.535492265432991e-06, + "loss": 0.2587, + "step": 1968, + "teacher_loss": 0.25310301780700684 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.22304552793502808, + "learning_rate": 8.539829405811767e-06, + "loss": 0.2132, + "step": 1969, + "teacher_loss": 0.2121340036392212 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.20896759629249573, + "learning_rate": 8.544166546190545e-06, + "loss": 0.2105, + "step": 1970, + "teacher_loss": 0.21067768335342407 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.5303832292556763, + "learning_rate": 8.548503686569323e-06, + "loss": 0.196, + "step": 1971, + "teacher_loss": 0.15886105597019196 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3733620047569275, + "learning_rate": 8.552840826948099e-06, + "loss": 0.1734, + "step": 1972, + "teacher_loss": 0.15118274092674255 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.4080936908721924, + "learning_rate": 8.557177967326876e-06, + "loss": 0.1998, + "step": 1973, + "teacher_loss": 0.17666301131248474 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.8268827795982361, + "learning_rate": 8.561515107705654e-06, + "loss": 0.2386, + "step": 1974, + "teacher_loss": 0.17326869070529938 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.18118637800216675, + "learning_rate": 8.56585224808443e-06, + "loss": 0.189, + "step": 1975, + "teacher_loss": 0.18990099430084229 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.7398730516433716, + "learning_rate": 8.570189388463206e-06, + "loss": 0.272, + "step": 1976, + "teacher_loss": 0.22005236148834229 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.15803629159927368, + "learning_rate": 8.574526528841983e-06, + "loss": 0.1875, + "step": 1977, + "teacher_loss": 0.19074061512947083 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.2643776535987854, + "learning_rate": 8.578863669220761e-06, + "loss": 0.1901, + "step": 1978, + "teacher_loss": 0.18189160525798798 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.2945954501628876, + "learning_rate": 8.583200809599539e-06, + "loss": 0.2082, + "step": 1979, + "teacher_loss": 0.19857361912727356 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.30913832783699036, + "learning_rate": 8.587537949978314e-06, + "loss": 0.2251, + "step": 1980, + "teacher_loss": 0.2157401144504547 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.558434247970581, + "learning_rate": 8.59187509035709e-06, + "loss": 0.2544, + "step": 1981, + "teacher_loss": 0.22056470811367035 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.20248591899871826, + "learning_rate": 8.596212230735868e-06, + "loss": 0.1903, + "step": 1982, + "teacher_loss": 0.18894410133361816 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.37931349873542786, + "learning_rate": 8.600549371114646e-06, + "loss": 0.2783, + "step": 1983, + "teacher_loss": 0.2670256793498993 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.381530225276947, + "learning_rate": 8.604886511493423e-06, + "loss": 0.1666, + "step": 1984, + "teacher_loss": 0.14266520738601685 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.31294405460357666, + "learning_rate": 8.6092236518722e-06, + "loss": 0.2126, + "step": 1985, + "teacher_loss": 0.20144245028495789 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.318248987197876, + "learning_rate": 8.613560792250975e-06, + "loss": 0.2728, + "step": 1986, + "teacher_loss": 0.2677832245826721 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.39165812730789185, + "learning_rate": 8.617897932629753e-06, + "loss": 0.254, + "step": 1987, + "teacher_loss": 0.238724485039711 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3828313946723938, + "learning_rate": 8.62223507300853e-06, + "loss": 0.2183, + "step": 1988, + "teacher_loss": 0.20003418624401093 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.5963172912597656, + "learning_rate": 8.626572213387308e-06, + "loss": 0.3147, + "step": 1989, + "teacher_loss": 0.28335729241371155 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.18338504433631897, + "learning_rate": 8.630909353766084e-06, + "loss": 0.174, + "step": 1990, + "teacher_loss": 0.17292268574237823 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.7385209798812866, + "learning_rate": 8.63524649414486e-06, + "loss": 0.3269, + "step": 1991, + "teacher_loss": 0.28118351101875305 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.5545350313186646, + "learning_rate": 8.639583634523637e-06, + "loss": 0.2669, + "step": 1992, + "teacher_loss": 0.23492911458015442 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.2846753001213074, + "learning_rate": 8.643920774902415e-06, + "loss": 0.1898, + "step": 1993, + "teacher_loss": 0.17925618588924408 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.6343709826469421, + "learning_rate": 8.648257915281191e-06, + "loss": 0.2883, + "step": 1994, + "teacher_loss": 0.24989870190620422 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.5843238830566406, + "learning_rate": 8.652595055659969e-06, + "loss": 0.3305, + "step": 1995, + "teacher_loss": 0.3022594451904297 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.23642893135547638, + "learning_rate": 8.656932196038746e-06, + "loss": 0.228, + "step": 1996, + "teacher_loss": 0.2270338088274002 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3431861102581024, + "learning_rate": 8.661269336417522e-06, + "loss": 0.232, + "step": 1997, + "teacher_loss": 0.2196808159351349 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.41902440786361694, + "learning_rate": 8.665606476796298e-06, + "loss": 0.2062, + "step": 1998, + "teacher_loss": 0.18254628777503967 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.25876468420028687, + "learning_rate": 8.669943617175076e-06, + "loss": 0.2687, + "step": 1999, + "teacher_loss": 0.2698257267475128 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.7336583137512207, + "learning_rate": 8.674280757553853e-06, + "loss": 0.3261, + "step": 2000, + "teacher_loss": 0.2807842493057251 + }, + { + "epoch": 0.36, + "eval_exact_match": 79.9526963103122, + "eval_f1": 87.32404141032497, + "step": 2000 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.588871955871582, + "learning_rate": 8.678617897932631e-06, + "loss": 0.2493, + "step": 2001, + "teacher_loss": 0.2115837037563324 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.7952429056167603, + "learning_rate": 8.682955038311407e-06, + "loss": 0.2277, + "step": 2002, + "teacher_loss": 0.1646772027015686 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.5486610531806946, + "learning_rate": 8.687292178690183e-06, + "loss": 0.2259, + "step": 2003, + "teacher_loss": 0.19000765681266785 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 1.0395715236663818, + "learning_rate": 8.69162931906896e-06, + "loss": 0.2831, + "step": 2004, + "teacher_loss": 0.1990422010421753 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.2756597697734833, + "learning_rate": 8.695966459447738e-06, + "loss": 0.1824, + "step": 2005, + "teacher_loss": 0.17209115624427795 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.33922529220581055, + "learning_rate": 8.700303599826516e-06, + "loss": 0.2085, + "step": 2006, + "teacher_loss": 0.19396401941776276 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.33040666580200195, + "learning_rate": 8.704640740205292e-06, + "loss": 0.241, + "step": 2007, + "teacher_loss": 0.2310769110918045 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3931776285171509, + "learning_rate": 8.708977880584068e-06, + "loss": 0.2487, + "step": 2008, + "teacher_loss": 0.23267048597335815 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.2084324061870575, + "learning_rate": 8.713315020962845e-06, + "loss": 0.2092, + "step": 2009, + "teacher_loss": 0.20931166410446167 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.5330890417098999, + "learning_rate": 8.717652161341623e-06, + "loss": 0.2161, + "step": 2010, + "teacher_loss": 0.18085786700248718 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.288196325302124, + "learning_rate": 8.7219893017204e-06, + "loss": 0.2688, + "step": 2011, + "teacher_loss": 0.2666383385658264 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.25869935750961304, + "learning_rate": 8.726326442099176e-06, + "loss": 0.1881, + "step": 2012, + "teacher_loss": 0.18026980757713318 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.7456955909729004, + "learning_rate": 8.730663582477952e-06, + "loss": 0.2768, + "step": 2013, + "teacher_loss": 0.22473645210266113 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.39600855112075806, + "learning_rate": 8.73500072285673e-06, + "loss": 0.2457, + "step": 2014, + "teacher_loss": 0.22903785109519958 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3784370422363281, + "learning_rate": 8.739337863235507e-06, + "loss": 0.1831, + "step": 2015, + "teacher_loss": 0.16134443879127502 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.3339955806732178, + "learning_rate": 8.743675003614283e-06, + "loss": 0.1901, + "step": 2016, + "teacher_loss": 0.17406892776489258 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.19792324304580688, + "learning_rate": 8.748012143993061e-06, + "loss": 0.1823, + "step": 2017, + "teacher_loss": 0.1805633008480072 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.25825729966163635, + "learning_rate": 8.752349284371837e-06, + "loss": 0.2273, + "step": 2018, + "teacher_loss": 0.22387373447418213 + }, + { + "compression_loss": 0.0, + "epoch": 0.36, + "label_loss": 0.48217809200286865, + "learning_rate": 8.756686424750615e-06, + "loss": 0.2582, + "step": 2019, + "teacher_loss": 0.23334653675556183 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.21896736323833466, + "learning_rate": 8.761023565129392e-06, + "loss": 0.1825, + "step": 2020, + "teacher_loss": 0.1784682273864746 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.6242147088050842, + "learning_rate": 8.765360705508168e-06, + "loss": 0.2849, + "step": 2021, + "teacher_loss": 0.24722039699554443 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.6768083572387695, + "learning_rate": 8.769697845886946e-06, + "loss": 0.3201, + "step": 2022, + "teacher_loss": 0.2804555892944336 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.7817792892456055, + "learning_rate": 8.774034986265723e-06, + "loss": 0.2801, + "step": 2023, + "teacher_loss": 0.22440074384212494 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.756580114364624, + "learning_rate": 8.7783721266445e-06, + "loss": 0.2759, + "step": 2024, + "teacher_loss": 0.2225157916545868 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5330575108528137, + "learning_rate": 8.782709267023275e-06, + "loss": 0.2556, + "step": 2025, + "teacher_loss": 0.22478607296943665 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.7903412580490112, + "learning_rate": 8.787046407402053e-06, + "loss": 0.2729, + "step": 2026, + "teacher_loss": 0.21535338461399078 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5399750471115112, + "learning_rate": 8.79138354778083e-06, + "loss": 0.4124, + "step": 2027, + "teacher_loss": 0.398262083530426 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.2639715075492859, + "learning_rate": 8.795720688159608e-06, + "loss": 0.1791, + "step": 2028, + "teacher_loss": 0.1696719527244568 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.2311077117919922, + "learning_rate": 8.800057828538382e-06, + "loss": 0.1831, + "step": 2029, + "teacher_loss": 0.17779403924942017 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.4688766300678253, + "learning_rate": 8.80439496891716e-06, + "loss": 0.2569, + "step": 2030, + "teacher_loss": 0.2333940714597702 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.2640064060688019, + "learning_rate": 8.808732109295938e-06, + "loss": 0.1591, + "step": 2031, + "teacher_loss": 0.14747260510921478 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.6614512801170349, + "learning_rate": 8.813069249674715e-06, + "loss": 0.2616, + "step": 2032, + "teacher_loss": 0.21721000969409943 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3999093174934387, + "learning_rate": 8.817406390053493e-06, + "loss": 0.1848, + "step": 2033, + "teacher_loss": 0.16086967289447784 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.7947626113891602, + "learning_rate": 8.821743530432269e-06, + "loss": 0.301, + "step": 2034, + "teacher_loss": 0.24614980816841125 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5450666546821594, + "learning_rate": 8.826080670811045e-06, + "loss": 0.2814, + "step": 2035, + "teacher_loss": 0.2520662248134613 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5424337387084961, + "learning_rate": 8.830417811189822e-06, + "loss": 0.3092, + "step": 2036, + "teacher_loss": 0.2832494378089905 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.6603044271469116, + "learning_rate": 8.8347549515686e-06, + "loss": 0.4978, + "step": 2037, + "teacher_loss": 0.47970038652420044 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.370551735162735, + "learning_rate": 8.839092091947376e-06, + "loss": 0.2222, + "step": 2038, + "teacher_loss": 0.2057531476020813 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.41036537289619446, + "learning_rate": 8.843429232326153e-06, + "loss": 0.2172, + "step": 2039, + "teacher_loss": 0.19574546813964844 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.36894387006759644, + "learning_rate": 8.84776637270493e-06, + "loss": 0.2151, + "step": 2040, + "teacher_loss": 0.19801267981529236 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.7365674376487732, + "learning_rate": 8.852103513083707e-06, + "loss": 0.2903, + "step": 2041, + "teacher_loss": 0.2407400757074356 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3448624908924103, + "learning_rate": 8.856440653462485e-06, + "loss": 0.1811, + "step": 2042, + "teacher_loss": 0.16288888454437256 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.24986150860786438, + "learning_rate": 8.86077779384126e-06, + "loss": 0.2289, + "step": 2043, + "teacher_loss": 0.22653119266033173 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.31847259402275085, + "learning_rate": 8.865114934220038e-06, + "loss": 0.291, + "step": 2044, + "teacher_loss": 0.288002610206604 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.49197614192962646, + "learning_rate": 8.869452074598816e-06, + "loss": 0.2302, + "step": 2045, + "teacher_loss": 0.20107513666152954 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.702581524848938, + "learning_rate": 8.873789214977592e-06, + "loss": 0.2921, + "step": 2046, + "teacher_loss": 0.24647703766822815 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5075963139533997, + "learning_rate": 8.878126355356368e-06, + "loss": 0.2328, + "step": 2047, + "teacher_loss": 0.20224273204803467 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.6403321623802185, + "learning_rate": 8.882463495735145e-06, + "loss": 0.2642, + "step": 2048, + "teacher_loss": 0.22235816717147827 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.35267117619514465, + "learning_rate": 8.886800636113923e-06, + "loss": 0.235, + "step": 2049, + "teacher_loss": 0.2218785583972931 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.12514732778072357, + "learning_rate": 8.8911377764927e-06, + "loss": 0.1653, + "step": 2050, + "teacher_loss": 0.1697188913822174 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3797406852245331, + "learning_rate": 8.895474916871475e-06, + "loss": 0.2321, + "step": 2051, + "teacher_loss": 0.21572750806808472 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.6932750940322876, + "learning_rate": 8.899812057250252e-06, + "loss": 0.342, + "step": 2052, + "teacher_loss": 0.3029642105102539 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.27024197578430176, + "learning_rate": 8.90414919762903e-06, + "loss": 0.2503, + "step": 2053, + "teacher_loss": 0.2481074333190918 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.4812772274017334, + "learning_rate": 8.908486338007808e-06, + "loss": 0.3006, + "step": 2054, + "teacher_loss": 0.2804809808731079 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.13784107565879822, + "learning_rate": 8.912823478386585e-06, + "loss": 0.1788, + "step": 2055, + "teacher_loss": 0.1833113133907318 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.38942182064056396, + "learning_rate": 8.917160618765361e-06, + "loss": 0.2152, + "step": 2056, + "teacher_loss": 0.195870041847229 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.35631370544433594, + "learning_rate": 8.921497759144137e-06, + "loss": 0.2804, + "step": 2057, + "teacher_loss": 0.27196890115737915 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.21285969018936157, + "learning_rate": 8.925834899522915e-06, + "loss": 0.2008, + "step": 2058, + "teacher_loss": 0.19941532611846924 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.450714111328125, + "learning_rate": 8.930172039901692e-06, + "loss": 0.2127, + "step": 2059, + "teacher_loss": 0.18625584244728088 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3550308346748352, + "learning_rate": 8.934509180280468e-06, + "loss": 0.2122, + "step": 2060, + "teacher_loss": 0.19637781381607056 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3289303779602051, + "learning_rate": 8.938846320659246e-06, + "loss": 0.2421, + "step": 2061, + "teacher_loss": 0.23245888948440552 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5442341566085815, + "learning_rate": 8.943183461038022e-06, + "loss": 0.2404, + "step": 2062, + "teacher_loss": 0.20665675401687622 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5283471941947937, + "learning_rate": 8.9475206014168e-06, + "loss": 0.2716, + "step": 2063, + "teacher_loss": 0.24312719702720642 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3343338668346405, + "learning_rate": 8.951857741795577e-06, + "loss": 0.1686, + "step": 2064, + "teacher_loss": 0.15023337304592133 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.39035654067993164, + "learning_rate": 8.956194882174353e-06, + "loss": 0.1989, + "step": 2065, + "teacher_loss": 0.17762787640094757 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.3479209542274475, + "learning_rate": 8.96053202255313e-06, + "loss": 0.2541, + "step": 2066, + "teacher_loss": 0.24369065463542938 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.20199933648109436, + "learning_rate": 8.964869162931907e-06, + "loss": 0.2247, + "step": 2067, + "teacher_loss": 0.22719204425811768 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5131199955940247, + "learning_rate": 8.969206303310684e-06, + "loss": 0.2447, + "step": 2068, + "teacher_loss": 0.2149207890033722 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.37718072533607483, + "learning_rate": 8.97354344368946e-06, + "loss": 0.2398, + "step": 2069, + "teacher_loss": 0.224510058760643 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.24448087811470032, + "learning_rate": 8.977880584068238e-06, + "loss": 0.1891, + "step": 2070, + "teacher_loss": 0.182974711060524 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.7908239960670471, + "learning_rate": 8.982217724447015e-06, + "loss": 0.3191, + "step": 2071, + "teacher_loss": 0.26663511991500854 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5636007189750671, + "learning_rate": 8.986554864825793e-06, + "loss": 0.2119, + "step": 2072, + "teacher_loss": 0.172852024435997 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5297435522079468, + "learning_rate": 8.990892005204567e-06, + "loss": 0.2453, + "step": 2073, + "teacher_loss": 0.213679701089859 + }, + { + "compression_loss": 0.0, + "epoch": 0.37, + "label_loss": 0.5025248527526855, + "learning_rate": 8.995229145583345e-06, + "loss": 0.2351, + "step": 2074, + "teacher_loss": 0.20539703965187073 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4148525595664978, + "learning_rate": 8.999566285962122e-06, + "loss": 0.2804, + "step": 2075, + "teacher_loss": 0.26550984382629395 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.594307005405426, + "learning_rate": 9.0039034263409e-06, + "loss": 0.2604, + "step": 2076, + "teacher_loss": 0.22331345081329346 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4579019546508789, + "learning_rate": 9.008240566719678e-06, + "loss": 0.1722, + "step": 2077, + "teacher_loss": 0.140457421541214 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.41960811614990234, + "learning_rate": 9.012577707098452e-06, + "loss": 0.1757, + "step": 2078, + "teacher_loss": 0.14856143295764923 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.2651267647743225, + "learning_rate": 9.01691484747723e-06, + "loss": 0.2083, + "step": 2079, + "teacher_loss": 0.2020222246646881 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.6848872900009155, + "learning_rate": 9.021251987856007e-06, + "loss": 0.2707, + "step": 2080, + "teacher_loss": 0.224684938788414 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.7914320230484009, + "learning_rate": 9.025589128234785e-06, + "loss": 0.2626, + "step": 2081, + "teacher_loss": 0.2038527876138687 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.296835720539093, + "learning_rate": 9.02992626861356e-06, + "loss": 0.1991, + "step": 2082, + "teacher_loss": 0.1882028877735138 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.48113179206848145, + "learning_rate": 9.034263408992338e-06, + "loss": 0.2101, + "step": 2083, + "teacher_loss": 0.17998534440994263 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.37829673290252686, + "learning_rate": 9.038600549371114e-06, + "loss": 0.2522, + "step": 2084, + "teacher_loss": 0.23822768032550812 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.2498432695865631, + "learning_rate": 9.042937689749892e-06, + "loss": 0.2035, + "step": 2085, + "teacher_loss": 0.19834960997104645 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.5646398067474365, + "learning_rate": 9.04727483012867e-06, + "loss": 0.2589, + "step": 2086, + "teacher_loss": 0.2249540537595749 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.6695016026496887, + "learning_rate": 9.051611970507445e-06, + "loss": 0.2765, + "step": 2087, + "teacher_loss": 0.23279020190238953 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.20475949347019196, + "learning_rate": 9.055949110886223e-06, + "loss": 0.2163, + "step": 2088, + "teacher_loss": 0.21759286522865295 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.7687006592750549, + "learning_rate": 9.060286251264999e-06, + "loss": 0.2746, + "step": 2089, + "teacher_loss": 0.21971040964126587 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.5942267179489136, + "learning_rate": 9.064623391643777e-06, + "loss": 0.2956, + "step": 2090, + "teacher_loss": 0.2624244689941406 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.25686657428741455, + "learning_rate": 9.068960532022553e-06, + "loss": 0.2123, + "step": 2091, + "teacher_loss": 0.20739948749542236 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.39781856536865234, + "learning_rate": 9.07329767240133e-06, + "loss": 0.4133, + "step": 2092, + "teacher_loss": 0.4150695502758026 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.9219763278961182, + "learning_rate": 9.077634812780108e-06, + "loss": 0.2123, + "step": 2093, + "teacher_loss": 0.13346675038337708 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4803623557090759, + "learning_rate": 9.081971953158885e-06, + "loss": 0.2548, + "step": 2094, + "teacher_loss": 0.2297501564025879 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.46216902136802673, + "learning_rate": 9.08630909353766e-06, + "loss": 0.2183, + "step": 2095, + "teacher_loss": 0.19125859439373016 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.3731536865234375, + "learning_rate": 9.090646233916437e-06, + "loss": 0.2482, + "step": 2096, + "teacher_loss": 0.23431020975112915 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.3863866925239563, + "learning_rate": 9.094983374295215e-06, + "loss": 0.2634, + "step": 2097, + "teacher_loss": 0.24969086050987244 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.8510243892669678, + "learning_rate": 9.099320514673993e-06, + "loss": 0.3111, + "step": 2098, + "teacher_loss": 0.2511082887649536 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4815167188644409, + "learning_rate": 9.10365765505277e-06, + "loss": 0.253, + "step": 2099, + "teacher_loss": 0.22756710648536682 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.3032773733139038, + "learning_rate": 9.107994795431544e-06, + "loss": 0.2387, + "step": 2100, + "teacher_loss": 0.23156434297561646 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.8672282099723816, + "learning_rate": 9.112331935810322e-06, + "loss": 0.4178, + "step": 2101, + "teacher_loss": 0.3678891360759735 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.7124062776565552, + "learning_rate": 9.1166690761891e-06, + "loss": 0.2583, + "step": 2102, + "teacher_loss": 0.20782314240932465 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4278835654258728, + "learning_rate": 9.121006216567877e-06, + "loss": 0.2713, + "step": 2103, + "teacher_loss": 0.2538614869117737 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.18787714838981628, + "learning_rate": 9.125343356946653e-06, + "loss": 0.1931, + "step": 2104, + "teacher_loss": 0.1937095820903778 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4821511507034302, + "learning_rate": 9.12968049732543e-06, + "loss": 0.291, + "step": 2105, + "teacher_loss": 0.2697063088417053 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4224971830844879, + "learning_rate": 9.134017637704207e-06, + "loss": 0.2987, + "step": 2106, + "teacher_loss": 0.2849319577217102 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4728459119796753, + "learning_rate": 9.138354778082984e-06, + "loss": 0.3212, + "step": 2107, + "teacher_loss": 0.30432137846946716 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4405709505081177, + "learning_rate": 9.142691918461762e-06, + "loss": 0.3043, + "step": 2108, + "teacher_loss": 0.2891288995742798 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.42375272512435913, + "learning_rate": 9.147029058840538e-06, + "loss": 0.2275, + "step": 2109, + "teacher_loss": 0.20569898188114166 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.1682019829750061, + "learning_rate": 9.151366199219316e-06, + "loss": 0.2193, + "step": 2110, + "teacher_loss": 0.2249731421470642 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4222942292690277, + "learning_rate": 9.155703339598091e-06, + "loss": 0.2325, + "step": 2111, + "teacher_loss": 0.21136754751205444 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4456612467765808, + "learning_rate": 9.160040479976869e-06, + "loss": 0.3435, + "step": 2112, + "teacher_loss": 0.332125723361969 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.48074495792388916, + "learning_rate": 9.164377620355645e-06, + "loss": 0.2321, + "step": 2113, + "teacher_loss": 0.20442691445350647 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.19290198385715485, + "learning_rate": 9.168714760734423e-06, + "loss": 0.2297, + "step": 2114, + "teacher_loss": 0.23380103707313538 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4083178639411926, + "learning_rate": 9.1730519011132e-06, + "loss": 0.209, + "step": 2115, + "teacher_loss": 0.18685731291770935 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.47407251596450806, + "learning_rate": 9.177389041491976e-06, + "loss": 0.3237, + "step": 2116, + "teacher_loss": 0.30699652433395386 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.26519811153411865, + "learning_rate": 9.181726181870752e-06, + "loss": 0.1737, + "step": 2117, + "teacher_loss": 0.16349007189273834 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.37804514169692993, + "learning_rate": 9.18606332224953e-06, + "loss": 0.2048, + "step": 2118, + "teacher_loss": 0.18558034300804138 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 1.0541452169418335, + "learning_rate": 9.190400462628307e-06, + "loss": 0.2747, + "step": 2119, + "teacher_loss": 0.18807856738567352 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.5357826948165894, + "learning_rate": 9.194737603007085e-06, + "loss": 0.2789, + "step": 2120, + "teacher_loss": 0.25034138560295105 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.3532499670982361, + "learning_rate": 9.199074743385863e-06, + "loss": 0.296, + "step": 2121, + "teacher_loss": 0.2896440029144287 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.3230995237827301, + "learning_rate": 9.203411883764637e-06, + "loss": 0.2296, + "step": 2122, + "teacher_loss": 0.2192222774028778 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.9647735953330994, + "learning_rate": 9.207749024143414e-06, + "loss": 0.2812, + "step": 2123, + "teacher_loss": 0.20519641041755676 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4289681911468506, + "learning_rate": 9.212086164522192e-06, + "loss": 0.2993, + "step": 2124, + "teacher_loss": 0.28487497568130493 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4380072355270386, + "learning_rate": 9.21642330490097e-06, + "loss": 0.2316, + "step": 2125, + "teacher_loss": 0.20871500670909882 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.6278570890426636, + "learning_rate": 9.220760445279746e-06, + "loss": 0.2618, + "step": 2126, + "teacher_loss": 0.2211454212665558 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.41738349199295044, + "learning_rate": 9.225097585658522e-06, + "loss": 0.2602, + "step": 2127, + "teacher_loss": 0.24276286363601685 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4858693480491638, + "learning_rate": 9.2294347260373e-06, + "loss": 0.2281, + "step": 2128, + "teacher_loss": 0.19951000809669495 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.4537910223007202, + "learning_rate": 9.233771866416077e-06, + "loss": 0.2073, + "step": 2129, + "teacher_loss": 0.17989768087863922 + }, + { + "compression_loss": 0.0, + "epoch": 0.38, + "label_loss": 0.23246467113494873, + "learning_rate": 9.238109006794854e-06, + "loss": 0.2513, + "step": 2130, + "teacher_loss": 0.25334692001342773 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.4051157236099243, + "learning_rate": 9.24244614717363e-06, + "loss": 0.2203, + "step": 2131, + "teacher_loss": 0.199751615524292 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.28404003381729126, + "learning_rate": 9.246783287552408e-06, + "loss": 0.2217, + "step": 2132, + "teacher_loss": 0.21472257375717163 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.17972180247306824, + "learning_rate": 9.251120427931184e-06, + "loss": 0.1692, + "step": 2133, + "teacher_loss": 0.16801142692565918 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.3178696036338806, + "learning_rate": 9.255457568309962e-06, + "loss": 0.2196, + "step": 2134, + "teacher_loss": 0.20871925354003906 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.4049859642982483, + "learning_rate": 9.259794708688737e-06, + "loss": 0.2821, + "step": 2135, + "teacher_loss": 0.268497496843338 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.7567098140716553, + "learning_rate": 9.264131849067515e-06, + "loss": 0.2024, + "step": 2136, + "teacher_loss": 0.14081138372421265 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.4760478138923645, + "learning_rate": 9.268468989446293e-06, + "loss": 0.5551, + "step": 2137, + "teacher_loss": 0.5638923645019531 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.3158319592475891, + "learning_rate": 9.272806129825069e-06, + "loss": 0.1546, + "step": 2138, + "teacher_loss": 0.1366778165102005 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.4463692307472229, + "learning_rate": 9.277143270203845e-06, + "loss": 0.2482, + "step": 2139, + "teacher_loss": 0.22623416781425476 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.31751370429992676, + "learning_rate": 9.281480410582622e-06, + "loss": 0.1651, + "step": 2140, + "teacher_loss": 0.1481410562992096 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.2751868963241577, + "learning_rate": 9.2858175509614e-06, + "loss": 0.2523, + "step": 2141, + "teacher_loss": 0.24979335069656372 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.2521328628063202, + "learning_rate": 9.290154691340177e-06, + "loss": 0.2432, + "step": 2142, + "teacher_loss": 0.24226175248622894 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5611305236816406, + "learning_rate": 9.294491831718955e-06, + "loss": 0.2541, + "step": 2143, + "teacher_loss": 0.22002173960208893 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 1.1910628080368042, + "learning_rate": 9.29882897209773e-06, + "loss": 0.3634, + "step": 2144, + "teacher_loss": 0.27148547768592834 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5105026364326477, + "learning_rate": 9.303166112476507e-06, + "loss": 0.2313, + "step": 2145, + "teacher_loss": 0.20025435090065002 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 1.1379374265670776, + "learning_rate": 9.307503252855285e-06, + "loss": 0.358, + "step": 2146, + "teacher_loss": 0.2713821530342102 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.6692630052566528, + "learning_rate": 9.311840393234062e-06, + "loss": 0.2763, + "step": 2147, + "teacher_loss": 0.23267696797847748 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.34099072217941284, + "learning_rate": 9.316177533612838e-06, + "loss": 0.2496, + "step": 2148, + "teacher_loss": 0.23947863280773163 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5257954597473145, + "learning_rate": 9.320514673991614e-06, + "loss": 0.2321, + "step": 2149, + "teacher_loss": 0.1994883418083191 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.42799150943756104, + "learning_rate": 9.324851814370392e-06, + "loss": 0.2667, + "step": 2150, + "teacher_loss": 0.24874289333820343 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.41456642746925354, + "learning_rate": 9.32918895474917e-06, + "loss": 0.2828, + "step": 2151, + "teacher_loss": 0.2681078314781189 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.3473452627658844, + "learning_rate": 9.333526095127947e-06, + "loss": 0.2366, + "step": 2152, + "teacher_loss": 0.22431805729866028 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5971699953079224, + "learning_rate": 9.337863235506723e-06, + "loss": 0.2208, + "step": 2153, + "teacher_loss": 0.17901864647865295 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.23845408856868744, + "learning_rate": 9.3422003758855e-06, + "loss": 0.1879, + "step": 2154, + "teacher_loss": 0.1823098361492157 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.3232029676437378, + "learning_rate": 9.346537516264276e-06, + "loss": 0.1809, + "step": 2155, + "teacher_loss": 0.1651291698217392 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.7645639181137085, + "learning_rate": 9.350874656643054e-06, + "loss": 0.3224, + "step": 2156, + "teacher_loss": 0.2732837200164795 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.30576765537261963, + "learning_rate": 9.35521179702183e-06, + "loss": 0.2027, + "step": 2157, + "teacher_loss": 0.1912125200033188 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.13728633522987366, + "learning_rate": 9.359548937400607e-06, + "loss": 0.1575, + "step": 2158, + "teacher_loss": 0.15970264375209808 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.38085559010505676, + "learning_rate": 9.363886077779385e-06, + "loss": 0.292, + "step": 2159, + "teacher_loss": 0.28213202953338623 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.15269820392131805, + "learning_rate": 9.368223218158161e-06, + "loss": 0.1957, + "step": 2160, + "teacher_loss": 0.20049670338630676 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.8320853114128113, + "learning_rate": 9.372560358536939e-06, + "loss": 0.3104, + "step": 2161, + "teacher_loss": 0.2524350881576538 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.4620468318462372, + "learning_rate": 9.376897498915715e-06, + "loss": 0.3223, + "step": 2162, + "teacher_loss": 0.3067595362663269 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5319443345069885, + "learning_rate": 9.381234639294492e-06, + "loss": 0.3103, + "step": 2163, + "teacher_loss": 0.2856678366661072 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.6011531949043274, + "learning_rate": 9.38557177967327e-06, + "loss": 0.2303, + "step": 2164, + "teacher_loss": 0.189103901386261 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.7799822092056274, + "learning_rate": 9.389908920052046e-06, + "loss": 0.2928, + "step": 2165, + "teacher_loss": 0.2386912852525711 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.8331639766693115, + "learning_rate": 9.394246060430822e-06, + "loss": 0.2792, + "step": 2166, + "teacher_loss": 0.21768295764923096 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.8568414449691772, + "learning_rate": 9.3985832008096e-06, + "loss": 0.2889, + "step": 2167, + "teacher_loss": 0.225747212767601 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5447222590446472, + "learning_rate": 9.402920341188377e-06, + "loss": 0.2724, + "step": 2168, + "teacher_loss": 0.24210651218891144 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.4856337308883667, + "learning_rate": 9.407257481567155e-06, + "loss": 0.3182, + "step": 2169, + "teacher_loss": 0.2995622754096985 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.6067987084388733, + "learning_rate": 9.41159462194593e-06, + "loss": 0.3291, + "step": 2170, + "teacher_loss": 0.29819437861442566 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.2784542143344879, + "learning_rate": 9.415931762324706e-06, + "loss": 0.2455, + "step": 2171, + "teacher_loss": 0.24187399446964264 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5377089977264404, + "learning_rate": 9.420268902703484e-06, + "loss": 0.3191, + "step": 2172, + "teacher_loss": 0.2947670519351959 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.07679194957017899, + "learning_rate": 9.424606043082262e-06, + "loss": 0.2144, + "step": 2173, + "teacher_loss": 0.22972631454467773 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.1682363599538803, + "learning_rate": 9.42894318346104e-06, + "loss": 0.2643, + "step": 2174, + "teacher_loss": 0.27501484751701355 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.813317060470581, + "learning_rate": 9.433280323839815e-06, + "loss": 0.3818, + "step": 2175, + "teacher_loss": 0.3338812291622162 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.2971251606941223, + "learning_rate": 9.437617464218591e-06, + "loss": 0.2549, + "step": 2176, + "teacher_loss": 0.2501808702945709 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.7232875823974609, + "learning_rate": 9.441954604597369e-06, + "loss": 0.3495, + "step": 2177, + "teacher_loss": 0.30794596672058105 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.3615860044956207, + "learning_rate": 9.446291744976146e-06, + "loss": 0.2588, + "step": 2178, + "teacher_loss": 0.24740543961524963 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5762007832527161, + "learning_rate": 9.450628885354922e-06, + "loss": 0.3085, + "step": 2179, + "teacher_loss": 0.2787438631057739 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.28633952140808105, + "learning_rate": 9.4549660257337e-06, + "loss": 0.257, + "step": 2180, + "teacher_loss": 0.2537161707878113 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.23427675664424896, + "learning_rate": 9.459303166112478e-06, + "loss": 0.1641, + "step": 2181, + "teacher_loss": 0.15630245208740234 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.3803035616874695, + "learning_rate": 9.463640306491253e-06, + "loss": 0.2538, + "step": 2182, + "teacher_loss": 0.2397821992635727 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.5253793001174927, + "learning_rate": 9.467977446870031e-06, + "loss": 0.3326, + "step": 2183, + "teacher_loss": 0.3111928403377533 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.6766204833984375, + "learning_rate": 9.472314587248807e-06, + "loss": 0.3705, + "step": 2184, + "teacher_loss": 0.3365001678466797 + }, + { + "compression_loss": 0.0, + "epoch": 0.39, + "label_loss": 0.36411163210868835, + "learning_rate": 9.476651727627585e-06, + "loss": 0.2854, + "step": 2185, + "teacher_loss": 0.2767032980918884 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.4328285753726959, + "learning_rate": 9.480988868006362e-06, + "loss": 0.2208, + "step": 2186, + "teacher_loss": 0.1972917765378952 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.4181402325630188, + "learning_rate": 9.485326008385138e-06, + "loss": 0.1978, + "step": 2187, + "teacher_loss": 0.17331382632255554 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.48330575227737427, + "learning_rate": 9.489663148763914e-06, + "loss": 0.2788, + "step": 2188, + "teacher_loss": 0.25611740350723267 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.2704477906227112, + "learning_rate": 9.494000289142692e-06, + "loss": 0.2156, + "step": 2189, + "teacher_loss": 0.209501713514328 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.8168417811393738, + "learning_rate": 9.49833742952147e-06, + "loss": 0.3547, + "step": 2190, + "teacher_loss": 0.3033197224140167 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.7836443781852722, + "learning_rate": 9.502674569900247e-06, + "loss": 0.2576, + "step": 2191, + "teacher_loss": 0.19912272691726685 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.3452082574367523, + "learning_rate": 9.507011710279023e-06, + "loss": 0.2214, + "step": 2192, + "teacher_loss": 0.20760349929332733 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.7372835874557495, + "learning_rate": 9.511348850657799e-06, + "loss": 0.2297, + "step": 2193, + "teacher_loss": 0.17330431938171387 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.23263344168663025, + "learning_rate": 9.515685991036576e-06, + "loss": 0.2469, + "step": 2194, + "teacher_loss": 0.24846260249614716 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.30855706334114075, + "learning_rate": 9.520023131415354e-06, + "loss": 0.1961, + "step": 2195, + "teacher_loss": 0.18356117606163025 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.3313453197479248, + "learning_rate": 9.524360271794132e-06, + "loss": 0.2574, + "step": 2196, + "teacher_loss": 0.24912987649440765 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.3201009929180145, + "learning_rate": 9.528697412172908e-06, + "loss": 0.2066, + "step": 2197, + "teacher_loss": 0.19396552443504333 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5197339653968811, + "learning_rate": 9.533034552551684e-06, + "loss": 0.2215, + "step": 2198, + "teacher_loss": 0.18833063542842865 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.3336051106452942, + "learning_rate": 9.537371692930461e-06, + "loss": 0.3164, + "step": 2199, + "teacher_loss": 0.31451624631881714 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.2260984182357788, + "learning_rate": 9.541708833309239e-06, + "loss": 0.1653, + "step": 2200, + "teacher_loss": 0.15855246782302856 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.40012457966804504, + "learning_rate": 9.546045973688015e-06, + "loss": 0.2205, + "step": 2201, + "teacher_loss": 0.20049473643302917 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.4544990658760071, + "learning_rate": 9.550383114066792e-06, + "loss": 0.2793, + "step": 2202, + "teacher_loss": 0.25987643003463745 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5559662580490112, + "learning_rate": 9.55472025444557e-06, + "loss": 0.2435, + "step": 2203, + "teacher_loss": 0.20883101224899292 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.4598582983016968, + "learning_rate": 9.559057394824346e-06, + "loss": 0.2208, + "step": 2204, + "teacher_loss": 0.1942415088415146 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.32681959867477417, + "learning_rate": 9.563394535203124e-06, + "loss": 0.2221, + "step": 2205, + "teacher_loss": 0.21041376888751984 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.49676692485809326, + "learning_rate": 9.5677316755819e-06, + "loss": 0.2572, + "step": 2206, + "teacher_loss": 0.23056307435035706 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.0893811360001564, + "learning_rate": 9.572068815960677e-06, + "loss": 0.1314, + "step": 2207, + "teacher_loss": 0.13603034615516663 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.14873647689819336, + "learning_rate": 9.576405956339455e-06, + "loss": 0.1928, + "step": 2208, + "teacher_loss": 0.1977020800113678 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5860141515731812, + "learning_rate": 9.58074309671823e-06, + "loss": 0.3024, + "step": 2209, + "teacher_loss": 0.27087458968162537 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.20724236965179443, + "learning_rate": 9.585080237097007e-06, + "loss": 0.2204, + "step": 2210, + "teacher_loss": 0.22189565002918243 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.26069575548171997, + "learning_rate": 9.589417377475784e-06, + "loss": 0.2963, + "step": 2211, + "teacher_loss": 0.3002605438232422 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5191426873207092, + "learning_rate": 9.593754517854562e-06, + "loss": 0.2987, + "step": 2212, + "teacher_loss": 0.2741590738296509 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.3210410475730896, + "learning_rate": 9.59809165823334e-06, + "loss": 0.3276, + "step": 2213, + "teacher_loss": 0.32833173871040344 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.4151713252067566, + "learning_rate": 9.602428798612115e-06, + "loss": 0.1755, + "step": 2214, + "teacher_loss": 0.14887070655822754 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.48839229345321655, + "learning_rate": 9.606765938990891e-06, + "loss": 0.2418, + "step": 2215, + "teacher_loss": 0.21436454355716705 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.45806974172592163, + "learning_rate": 9.611103079369669e-06, + "loss": 0.2144, + "step": 2216, + "teacher_loss": 0.18735596537590027 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.14502759277820587, + "learning_rate": 9.615440219748447e-06, + "loss": 0.222, + "step": 2217, + "teacher_loss": 0.23054593801498413 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5173236727714539, + "learning_rate": 9.619777360127224e-06, + "loss": 0.2951, + "step": 2218, + "teacher_loss": 0.2704623341560364 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5228928327560425, + "learning_rate": 9.624114500506e-06, + "loss": 0.2223, + "step": 2219, + "teacher_loss": 0.18892785906791687 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5419129729270935, + "learning_rate": 9.628451640884776e-06, + "loss": 0.2425, + "step": 2220, + "teacher_loss": 0.2092212289571762 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.19005830585956573, + "learning_rate": 9.632788781263554e-06, + "loss": 0.1725, + "step": 2221, + "teacher_loss": 0.17049893736839294 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.5575596690177917, + "learning_rate": 9.637125921642331e-06, + "loss": 0.3116, + "step": 2222, + "teacher_loss": 0.2842558026313782 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.28845590353012085, + "learning_rate": 9.641463062021107e-06, + "loss": 0.1955, + "step": 2223, + "teacher_loss": 0.18520215153694153 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.24566662311553955, + "learning_rate": 9.645800202399885e-06, + "loss": 0.1872, + "step": 2224, + "teacher_loss": 0.18067467212677002 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.20837059617042542, + "learning_rate": 9.65013734277866e-06, + "loss": 0.1957, + "step": 2225, + "teacher_loss": 0.1943013072013855 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.6324210166931152, + "learning_rate": 9.654474483157438e-06, + "loss": 0.3344, + "step": 2226, + "teacher_loss": 0.30128300189971924 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.4252736568450928, + "learning_rate": 9.658811623536216e-06, + "loss": 0.2594, + "step": 2227, + "teacher_loss": 0.24094724655151367 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.47234320640563965, + "learning_rate": 9.663148763914992e-06, + "loss": 0.278, + "step": 2228, + "teacher_loss": 0.2563807964324951 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.1902233362197876, + "learning_rate": 9.66748590429377e-06, + "loss": 0.2068, + "step": 2229, + "teacher_loss": 0.20864424109458923 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.6113001108169556, + "learning_rate": 9.671823044672547e-06, + "loss": 0.2365, + "step": 2230, + "teacher_loss": 0.1949041187763214 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.504298746585846, + "learning_rate": 9.676160185051323e-06, + "loss": 0.2368, + "step": 2231, + "teacher_loss": 0.2070726901292801 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.7816634178161621, + "learning_rate": 9.680497325430099e-06, + "loss": 0.3517, + "step": 2232, + "teacher_loss": 0.30388006567955017 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.24324238300323486, + "learning_rate": 9.684834465808877e-06, + "loss": 0.2083, + "step": 2233, + "teacher_loss": 0.20443210005760193 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.2713974118232727, + "learning_rate": 9.689171606187654e-06, + "loss": 0.1972, + "step": 2234, + "teacher_loss": 0.1889331042766571 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.39800143241882324, + "learning_rate": 9.693508746566432e-06, + "loss": 0.2472, + "step": 2235, + "teacher_loss": 0.23048971593379974 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.32909536361694336, + "learning_rate": 9.697845886945206e-06, + "loss": 0.2349, + "step": 2236, + "teacher_loss": 0.22447431087493896 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.13109725713729858, + "learning_rate": 9.702183027323984e-06, + "loss": 0.1523, + "step": 2237, + "teacher_loss": 0.15464109182357788 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.9299656748771667, + "learning_rate": 9.706520167702761e-06, + "loss": 0.3251, + "step": 2238, + "teacher_loss": 0.2579048275947571 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.629909873008728, + "learning_rate": 9.710857308081539e-06, + "loss": 0.2106, + "step": 2239, + "teacher_loss": 0.16398456692695618 + }, + { + "compression_loss": 0.0, + "epoch": 0.4, + "label_loss": 0.8239361047744751, + "learning_rate": 9.715194448460317e-06, + "loss": 0.2567, + "step": 2240, + "teacher_loss": 0.1936299204826355 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.463945209980011, + "learning_rate": 9.719531588839093e-06, + "loss": 0.4089, + "step": 2241, + "teacher_loss": 0.40281015634536743 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.4012555480003357, + "learning_rate": 9.723868729217868e-06, + "loss": 0.2647, + "step": 2242, + "teacher_loss": 0.24955996870994568 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.197137251496315, + "learning_rate": 9.728205869596646e-06, + "loss": 0.1611, + "step": 2243, + "teacher_loss": 0.15712295472621918 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6228808164596558, + "learning_rate": 9.732543009975424e-06, + "loss": 0.3091, + "step": 2244, + "teacher_loss": 0.274278849363327 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5808538198471069, + "learning_rate": 9.7368801503542e-06, + "loss": 0.313, + "step": 2245, + "teacher_loss": 0.28319019079208374 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.4658641219139099, + "learning_rate": 9.741217290732977e-06, + "loss": 0.3236, + "step": 2246, + "teacher_loss": 0.3077549934387207 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.7658321857452393, + "learning_rate": 9.745554431111753e-06, + "loss": 0.2756, + "step": 2247, + "teacher_loss": 0.22112470865249634 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.9507397413253784, + "learning_rate": 9.74989157149053e-06, + "loss": 0.2563, + "step": 2248, + "teacher_loss": 0.1791505515575409 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.8397104740142822, + "learning_rate": 9.754228711869308e-06, + "loss": 0.2558, + "step": 2249, + "teacher_loss": 0.19092419743537903 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.45005905628204346, + "learning_rate": 9.758565852248084e-06, + "loss": 0.2413, + "step": 2250, + "teacher_loss": 0.21810144186019897 + }, + { + "epoch": 0.41, + "eval_exact_match": 79.5364238410596, + "eval_f1": 87.11622431744448, + "step": 2250 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6465145945549011, + "learning_rate": 9.762902992626862e-06, + "loss": 0.2887, + "step": 2251, + "teacher_loss": 0.24898266792297363 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.833666205406189, + "learning_rate": 9.76724013300564e-06, + "loss": 0.3373, + "step": 2252, + "teacher_loss": 0.28210878372192383 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.7266898155212402, + "learning_rate": 9.771577273384416e-06, + "loss": 0.2551, + "step": 2253, + "teacher_loss": 0.20272672176361084 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.273129403591156, + "learning_rate": 9.775914413763191e-06, + "loss": 0.1758, + "step": 2254, + "teacher_loss": 0.16501866281032562 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.9625400304794312, + "learning_rate": 9.780251554141969e-06, + "loss": 0.3584, + "step": 2255, + "teacher_loss": 0.29128509759902954 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6064280271530151, + "learning_rate": 9.784588694520747e-06, + "loss": 0.2198, + "step": 2256, + "teacher_loss": 0.17681677639484406 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.716954231262207, + "learning_rate": 9.788925834899524e-06, + "loss": 0.2849, + "step": 2257, + "teacher_loss": 0.2369404435157776 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.9100842475891113, + "learning_rate": 9.793262975278299e-06, + "loss": 0.3052, + "step": 2258, + "teacher_loss": 0.23797696828842163 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.29342323541641235, + "learning_rate": 9.797600115657076e-06, + "loss": 0.2229, + "step": 2259, + "teacher_loss": 0.21510908007621765 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6890444755554199, + "learning_rate": 9.801937256035854e-06, + "loss": 0.2278, + "step": 2260, + "teacher_loss": 0.17649900913238525 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6942050457000732, + "learning_rate": 9.806274396414631e-06, + "loss": 0.3363, + "step": 2261, + "teacher_loss": 0.2965131402015686 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.3415362238883972, + "learning_rate": 9.810611536793409e-06, + "loss": 0.1996, + "step": 2262, + "teacher_loss": 0.18387725949287415 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5318688154220581, + "learning_rate": 9.814948677172185e-06, + "loss": 0.2522, + "step": 2263, + "teacher_loss": 0.22111092507839203 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6075310707092285, + "learning_rate": 9.819285817550961e-06, + "loss": 0.3005, + "step": 2264, + "teacher_loss": 0.26643550395965576 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.3764263987541199, + "learning_rate": 9.823622957929739e-06, + "loss": 0.2415, + "step": 2265, + "teacher_loss": 0.22651365399360657 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.34776681661605835, + "learning_rate": 9.827960098308516e-06, + "loss": 0.1894, + "step": 2266, + "teacher_loss": 0.1718285083770752 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.292323499917984, + "learning_rate": 9.832297238687292e-06, + "loss": 0.2334, + "step": 2267, + "teacher_loss": 0.22689521312713623 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.34436583518981934, + "learning_rate": 9.83663437906607e-06, + "loss": 0.1959, + "step": 2268, + "teacher_loss": 0.17939028143882751 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.44654616713523865, + "learning_rate": 9.840971519444846e-06, + "loss": 0.2089, + "step": 2269, + "teacher_loss": 0.18249574303627014 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5569769144058228, + "learning_rate": 9.845308659823623e-06, + "loss": 0.2609, + "step": 2270, + "teacher_loss": 0.2279849797487259 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.2435835897922516, + "learning_rate": 9.849645800202401e-06, + "loss": 0.2641, + "step": 2271, + "teacher_loss": 0.26636117696762085 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5169653296470642, + "learning_rate": 9.853982940581177e-06, + "loss": 0.238, + "step": 2272, + "teacher_loss": 0.20704606175422668 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.769324779510498, + "learning_rate": 9.858320080959954e-06, + "loss": 0.3828, + "step": 2273, + "teacher_loss": 0.33986395597457886 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.24644087255001068, + "learning_rate": 9.86265722133873e-06, + "loss": 0.2132, + "step": 2274, + "teacher_loss": 0.20946389436721802 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.7297289371490479, + "learning_rate": 9.866994361717508e-06, + "loss": 0.3247, + "step": 2275, + "teacher_loss": 0.2797221541404724 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.44911664724349976, + "learning_rate": 9.871331502096284e-06, + "loss": 0.2705, + "step": 2276, + "teacher_loss": 0.25062471628189087 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5729313492774963, + "learning_rate": 9.875668642475062e-06, + "loss": 0.4894, + "step": 2277, + "teacher_loss": 0.48017197847366333 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.2987278401851654, + "learning_rate": 9.880005782853839e-06, + "loss": 0.1885, + "step": 2278, + "teacher_loss": 0.17620965838432312 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.3366811275482178, + "learning_rate": 9.884342923232617e-06, + "loss": 0.2544, + "step": 2279, + "teacher_loss": 0.24530529975891113 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6210167407989502, + "learning_rate": 9.888680063611391e-06, + "loss": 0.2479, + "step": 2280, + "teacher_loss": 0.2064325362443924 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.48328113555908203, + "learning_rate": 9.893017203990169e-06, + "loss": 0.3367, + "step": 2281, + "teacher_loss": 0.32045167684555054 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.47786957025527954, + "learning_rate": 9.897354344368946e-06, + "loss": 0.2611, + "step": 2282, + "teacher_loss": 0.237041175365448 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.3577849268913269, + "learning_rate": 9.901691484747724e-06, + "loss": 0.2212, + "step": 2283, + "teacher_loss": 0.2059764266014099 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.20109067857265472, + "learning_rate": 9.906028625126501e-06, + "loss": 0.2015, + "step": 2284, + "teacher_loss": 0.20152871310710907 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.20129795372486115, + "learning_rate": 9.910365765505276e-06, + "loss": 0.2715, + "step": 2285, + "teacher_loss": 0.2792593836784363 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.302428275346756, + "learning_rate": 9.914702905884053e-06, + "loss": 0.1749, + "step": 2286, + "teacher_loss": 0.16077542304992676 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.48621752858161926, + "learning_rate": 9.919040046262831e-06, + "loss": 0.2539, + "step": 2287, + "teacher_loss": 0.22805029153823853 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5985076427459717, + "learning_rate": 9.923377186641609e-06, + "loss": 0.2641, + "step": 2288, + "teacher_loss": 0.22693976759910583 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.49067115783691406, + "learning_rate": 9.927714327020385e-06, + "loss": 0.2899, + "step": 2289, + "teacher_loss": 0.2675774097442627 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.6574618816375732, + "learning_rate": 9.932051467399162e-06, + "loss": 0.2771, + "step": 2290, + "teacher_loss": 0.23481649160385132 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.5394272804260254, + "learning_rate": 9.936388607777938e-06, + "loss": 0.2282, + "step": 2291, + "teacher_loss": 0.1935638040304184 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.2152406871318817, + "learning_rate": 9.940725748156716e-06, + "loss": 0.1851, + "step": 2292, + "teacher_loss": 0.18172720074653625 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.34301191568374634, + "learning_rate": 9.945062888535493e-06, + "loss": 0.2145, + "step": 2293, + "teacher_loss": 0.20020702481269836 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.3301433324813843, + "learning_rate": 9.94940002891427e-06, + "loss": 0.2282, + "step": 2294, + "teacher_loss": 0.21684956550598145 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.45296117663383484, + "learning_rate": 9.953737169293047e-06, + "loss": 0.289, + "step": 2295, + "teacher_loss": 0.27076005935668945 + }, + { + "compression_loss": 0.0, + "epoch": 0.41, + "label_loss": 0.19504909217357635, + "learning_rate": 9.958074309671823e-06, + "loss": 0.1652, + "step": 2296, + "teacher_loss": 0.16192524135112762 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.23968197405338287, + "learning_rate": 9.9624114500506e-06, + "loss": 0.1992, + "step": 2297, + "teacher_loss": 0.19464752078056335 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.24263378977775574, + "learning_rate": 9.966748590429376e-06, + "loss": 0.1575, + "step": 2298, + "teacher_loss": 0.14803577959537506 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.2428988814353943, + "learning_rate": 9.971085730808154e-06, + "loss": 0.2096, + "step": 2299, + "teacher_loss": 0.20585303008556366 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.15833738446235657, + "learning_rate": 9.975422871186932e-06, + "loss": 0.1511, + "step": 2300, + "teacher_loss": 0.15024270117282867 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5531819462776184, + "learning_rate": 9.97976001156571e-06, + "loss": 0.2749, + "step": 2301, + "teacher_loss": 0.2439463883638382 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.375693678855896, + "learning_rate": 9.984097151944483e-06, + "loss": 0.1803, + "step": 2302, + "teacher_loss": 0.15855905413627625 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.4576565623283386, + "learning_rate": 9.988434292323261e-06, + "loss": 0.2523, + "step": 2303, + "teacher_loss": 0.22948497533798218 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.43955957889556885, + "learning_rate": 9.992771432702039e-06, + "loss": 0.211, + "step": 2304, + "teacher_loss": 0.18558531999588013 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.553405225276947, + "learning_rate": 9.997108573080816e-06, + "loss": 0.2798, + "step": 2305, + "teacher_loss": 0.2493765652179718 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.47034305334091187, + "learning_rate": 1.0001445713459594e-05, + "loss": 0.2776, + "step": 2306, + "teacher_loss": 0.2562023997306824 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5617996454238892, + "learning_rate": 1.0005782853838368e-05, + "loss": 0.3638, + "step": 2307, + "teacher_loss": 0.3418330252170563 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.6733913421630859, + "learning_rate": 1.0010119994217146e-05, + "loss": 0.2621, + "step": 2308, + "teacher_loss": 0.21637627482414246 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.8181602954864502, + "learning_rate": 1.0014457134595923e-05, + "loss": 0.3101, + "step": 2309, + "teacher_loss": 0.25363266468048096 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.20042556524276733, + "learning_rate": 1.0018794274974701e-05, + "loss": 0.228, + "step": 2310, + "teacher_loss": 0.23111391067504883 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5272350311279297, + "learning_rate": 1.0023131415353477e-05, + "loss": 0.2672, + "step": 2311, + "teacher_loss": 0.23825430870056152 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5850472450256348, + "learning_rate": 1.0027468555732255e-05, + "loss": 0.2469, + "step": 2312, + "teacher_loss": 0.2092863917350769 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3368449807167053, + "learning_rate": 1.003180569611103e-05, + "loss": 0.2562, + "step": 2313, + "teacher_loss": 0.2472047507762909 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.22068515419960022, + "learning_rate": 1.0036142836489808e-05, + "loss": 0.1698, + "step": 2314, + "teacher_loss": 0.1641535460948944 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.2691343128681183, + "learning_rate": 1.0040479976868586e-05, + "loss": 0.1812, + "step": 2315, + "teacher_loss": 0.17144426703453064 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.18616539239883423, + "learning_rate": 1.0044817117247362e-05, + "loss": 0.1978, + "step": 2316, + "teacher_loss": 0.19905070960521698 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3410555124282837, + "learning_rate": 1.004915425762614e-05, + "loss": 0.1889, + "step": 2317, + "teacher_loss": 0.1720198094844818 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5155552625656128, + "learning_rate": 1.0053491398004915e-05, + "loss": 0.3477, + "step": 2318, + "teacher_loss": 0.3290276825428009 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.39196711778640747, + "learning_rate": 1.0057828538383693e-05, + "loss": 0.3081, + "step": 2319, + "teacher_loss": 0.29880768060684204 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.6837252378463745, + "learning_rate": 1.0062165678762469e-05, + "loss": 0.2913, + "step": 2320, + "teacher_loss": 0.2476527988910675 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.6306057572364807, + "learning_rate": 1.0066502819141246e-05, + "loss": 0.3729, + "step": 2321, + "teacher_loss": 0.344268262386322 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.31750744581222534, + "learning_rate": 1.0070839959520024e-05, + "loss": 0.2029, + "step": 2322, + "teacher_loss": 0.19020652770996094 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.28623390197753906, + "learning_rate": 1.00751770998988e-05, + "loss": 0.2095, + "step": 2323, + "teacher_loss": 0.2009831964969635 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.6174214482307434, + "learning_rate": 1.0079514240277578e-05, + "loss": 0.26, + "step": 2324, + "teacher_loss": 0.22029903531074524 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.07627439498901367, + "learning_rate": 1.0083851380656353e-05, + "loss": 0.1481, + "step": 2325, + "teacher_loss": 0.15602877736091614 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3895377814769745, + "learning_rate": 1.0088188521035131e-05, + "loss": 0.2325, + "step": 2326, + "teacher_loss": 0.21507856249809265 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.4977247714996338, + "learning_rate": 1.0092525661413909e-05, + "loss": 0.2148, + "step": 2327, + "teacher_loss": 0.18338578939437866 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.6924000382423401, + "learning_rate": 1.0096862801792686e-05, + "loss": 0.3048, + "step": 2328, + "teacher_loss": 0.2617051601409912 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.7958387136459351, + "learning_rate": 1.010119994217146e-05, + "loss": 0.3015, + "step": 2329, + "teacher_loss": 0.24660885334014893 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.51271653175354, + "learning_rate": 1.0105537082550238e-05, + "loss": 0.2054, + "step": 2330, + "teacher_loss": 0.17126289010047913 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3208063840866089, + "learning_rate": 1.0109874222929016e-05, + "loss": 0.207, + "step": 2331, + "teacher_loss": 0.19439736008644104 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.6714671850204468, + "learning_rate": 1.0114211363307793e-05, + "loss": 0.3493, + "step": 2332, + "teacher_loss": 0.3135136067867279 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3434569835662842, + "learning_rate": 1.011854850368657e-05, + "loss": 0.2983, + "step": 2333, + "teacher_loss": 0.2933364510536194 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.34067946672439575, + "learning_rate": 1.0122885644065345e-05, + "loss": 0.1844, + "step": 2334, + "teacher_loss": 0.16701556742191315 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5197765827178955, + "learning_rate": 1.0127222784444123e-05, + "loss": 0.2599, + "step": 2335, + "teacher_loss": 0.23100724816322327 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.42884141206741333, + "learning_rate": 1.01315599248229e-05, + "loss": 0.2935, + "step": 2336, + "teacher_loss": 0.2784738540649414 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.803558349609375, + "learning_rate": 1.0135897065201678e-05, + "loss": 0.2865, + "step": 2337, + "teacher_loss": 0.22906732559204102 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.7363027334213257, + "learning_rate": 1.0140234205580454e-05, + "loss": 0.2867, + "step": 2338, + "teacher_loss": 0.23679344356060028 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.1508190929889679, + "learning_rate": 1.0144571345959232e-05, + "loss": 0.1517, + "step": 2339, + "teacher_loss": 0.15184976160526276 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3046949803829193, + "learning_rate": 1.0148908486338008e-05, + "loss": 0.1679, + "step": 2340, + "teacher_loss": 0.1527009904384613 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.15535122156143188, + "learning_rate": 1.0153245626716785e-05, + "loss": 0.1496, + "step": 2341, + "teacher_loss": 0.14897367358207703 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.27468621730804443, + "learning_rate": 1.0157582767095561e-05, + "loss": 0.1957, + "step": 2342, + "teacher_loss": 0.1869387924671173 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3823353350162506, + "learning_rate": 1.0161919907474339e-05, + "loss": 0.2366, + "step": 2343, + "teacher_loss": 0.22037062048912048 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.33034995198249817, + "learning_rate": 1.0166257047853116e-05, + "loss": 0.1946, + "step": 2344, + "teacher_loss": 0.17950767278671265 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.8185216188430786, + "learning_rate": 1.0170594188231892e-05, + "loss": 0.3126, + "step": 2345, + "teacher_loss": 0.25640130043029785 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 1.0245083570480347, + "learning_rate": 1.017493132861067e-05, + "loss": 0.3125, + "step": 2346, + "teacher_loss": 0.23335415124893188 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3670728802680969, + "learning_rate": 1.0179268468989446e-05, + "loss": 0.2592, + "step": 2347, + "teacher_loss": 0.24723802506923676 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.5001082420349121, + "learning_rate": 1.0183605609368224e-05, + "loss": 0.2641, + "step": 2348, + "teacher_loss": 0.23785510659217834 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.7036440372467041, + "learning_rate": 1.0187942749747001e-05, + "loss": 0.2698, + "step": 2349, + "teacher_loss": 0.22159487009048462 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.3120863437652588, + "learning_rate": 1.0192279890125779e-05, + "loss": 0.3208, + "step": 2350, + "teacher_loss": 0.32175183296203613 + }, + { + "compression_loss": 0.0, + "epoch": 0.42, + "label_loss": 0.319937139749527, + "learning_rate": 1.0196617030504553e-05, + "loss": 0.3222, + "step": 2351, + "teacher_loss": 0.32242417335510254 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.42182475328445435, + "learning_rate": 1.020095417088333e-05, + "loss": 0.1954, + "step": 2352, + "teacher_loss": 0.17026779055595398 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.9801952838897705, + "learning_rate": 1.0205291311262108e-05, + "loss": 0.3076, + "step": 2353, + "teacher_loss": 0.2328852415084839 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.16557687520980835, + "learning_rate": 1.0209628451640886e-05, + "loss": 0.1648, + "step": 2354, + "teacher_loss": 0.1647290587425232 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.5737104415893555, + "learning_rate": 1.0213965592019662e-05, + "loss": 0.2421, + "step": 2355, + "teacher_loss": 0.20520544052124023 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.25342780351638794, + "learning_rate": 1.0218302732398438e-05, + "loss": 0.2138, + "step": 2356, + "teacher_loss": 0.2093610316514969 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.5346795916557312, + "learning_rate": 1.0222639872777215e-05, + "loss": 0.573, + "step": 2357, + "teacher_loss": 0.5773087739944458 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.307625412940979, + "learning_rate": 1.0226977013155993e-05, + "loss": 0.2187, + "step": 2358, + "teacher_loss": 0.20886112749576569 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.6142769455909729, + "learning_rate": 1.023131415353477e-05, + "loss": 0.4483, + "step": 2359, + "teacher_loss": 0.429845929145813 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.629026472568512, + "learning_rate": 1.0235651293913547e-05, + "loss": 0.2345, + "step": 2360, + "teacher_loss": 0.19061236083507538 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.45466238260269165, + "learning_rate": 1.0239988434292324e-05, + "loss": 0.2111, + "step": 2361, + "teacher_loss": 0.18409281969070435 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3808279037475586, + "learning_rate": 1.02443255746711e-05, + "loss": 0.2319, + "step": 2362, + "teacher_loss": 0.21537208557128906 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.44422000646591187, + "learning_rate": 1.0248662715049878e-05, + "loss": 0.2135, + "step": 2363, + "teacher_loss": 0.18791040778160095 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.2515581250190735, + "learning_rate": 1.0252999855428654e-05, + "loss": 0.1596, + "step": 2364, + "teacher_loss": 0.1494198888540268 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.2316003441810608, + "learning_rate": 1.0257336995807431e-05, + "loss": 0.2103, + "step": 2365, + "teacher_loss": 0.20790261030197144 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.2647305727005005, + "learning_rate": 1.0261674136186209e-05, + "loss": 0.2122, + "step": 2366, + "teacher_loss": 0.20640867948532104 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.7372491359710693, + "learning_rate": 1.0266011276564985e-05, + "loss": 0.2394, + "step": 2367, + "teacher_loss": 0.18407300114631653 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.41440093517303467, + "learning_rate": 1.0270348416943762e-05, + "loss": 0.2798, + "step": 2368, + "teacher_loss": 0.2648549973964691 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.17522266507148743, + "learning_rate": 1.0274685557322538e-05, + "loss": 0.1932, + "step": 2369, + "teacher_loss": 0.19521096348762512 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 1.035237431526184, + "learning_rate": 1.0279022697701316e-05, + "loss": 0.3951, + "step": 2370, + "teacher_loss": 0.3239811062812805 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.45606887340545654, + "learning_rate": 1.0283359838080094e-05, + "loss": 0.3314, + "step": 2371, + "teacher_loss": 0.3175421357154846 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.47557497024536133, + "learning_rate": 1.028769697845887e-05, + "loss": 0.2263, + "step": 2372, + "teacher_loss": 0.19856590032577515 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.16883325576782227, + "learning_rate": 1.0292034118837645e-05, + "loss": 0.2022, + "step": 2373, + "teacher_loss": 0.2058539092540741 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.42074912786483765, + "learning_rate": 1.0296371259216423e-05, + "loss": 0.4154, + "step": 2374, + "teacher_loss": 0.41480326652526855 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.5496131181716919, + "learning_rate": 1.03007083995952e-05, + "loss": 0.2345, + "step": 2375, + "teacher_loss": 0.1994635909795761 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.1385919153690338, + "learning_rate": 1.0305045539973978e-05, + "loss": 0.232, + "step": 2376, + "teacher_loss": 0.24233722686767578 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.4555704593658447, + "learning_rate": 1.0309382680352754e-05, + "loss": 0.2456, + "step": 2377, + "teacher_loss": 0.22230252623558044 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.5253335237503052, + "learning_rate": 1.031371982073153e-05, + "loss": 0.2521, + "step": 2378, + "teacher_loss": 0.22170299291610718 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.6658607721328735, + "learning_rate": 1.0318056961110308e-05, + "loss": 0.3161, + "step": 2379, + "teacher_loss": 0.2772500514984131 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3261749744415283, + "learning_rate": 1.0322394101489085e-05, + "loss": 0.2044, + "step": 2380, + "teacher_loss": 0.1908426135778427 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.29901862144470215, + "learning_rate": 1.0326731241867863e-05, + "loss": 0.2229, + "step": 2381, + "teacher_loss": 0.2143876701593399 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.6709582805633545, + "learning_rate": 1.0331068382246639e-05, + "loss": 0.2748, + "step": 2382, + "teacher_loss": 0.2308310568332672 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.9853832721710205, + "learning_rate": 1.0335405522625415e-05, + "loss": 0.2763, + "step": 2383, + "teacher_loss": 0.19752341508865356 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.4647062420845032, + "learning_rate": 1.0339742663004193e-05, + "loss": 0.255, + "step": 2384, + "teacher_loss": 0.23172365128993988 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 1.1897125244140625, + "learning_rate": 1.034407980338297e-05, + "loss": 0.6817, + "step": 2385, + "teacher_loss": 0.6252492666244507 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.7053435444831848, + "learning_rate": 1.0348416943761746e-05, + "loss": 0.2584, + "step": 2386, + "teacher_loss": 0.20875926315784454 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.2810845971107483, + "learning_rate": 1.0352754084140524e-05, + "loss": 0.1777, + "step": 2387, + "teacher_loss": 0.1661744862794876 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.7917479872703552, + "learning_rate": 1.0357091224519301e-05, + "loss": 0.2732, + "step": 2388, + "teacher_loss": 0.21561607718467712 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3023416996002197, + "learning_rate": 1.0361428364898077e-05, + "loss": 0.1887, + "step": 2389, + "teacher_loss": 0.17606337368488312 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.22276663780212402, + "learning_rate": 1.0365765505276855e-05, + "loss": 0.1666, + "step": 2390, + "teacher_loss": 0.16031357645988464 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3278570771217346, + "learning_rate": 1.037010264565563e-05, + "loss": 0.1639, + "step": 2391, + "teacher_loss": 0.14565476775169373 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 1.2403373718261719, + "learning_rate": 1.0374439786034408e-05, + "loss": 0.3944, + "step": 2392, + "teacher_loss": 0.30045682191848755 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3178249001502991, + "learning_rate": 1.0378776926413186e-05, + "loss": 0.1812, + "step": 2393, + "teacher_loss": 0.1659734845161438 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.16195733845233917, + "learning_rate": 1.0383114066791962e-05, + "loss": 0.2266, + "step": 2394, + "teacher_loss": 0.23380282521247864 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3827659487724304, + "learning_rate": 1.0387451207170738e-05, + "loss": 0.2067, + "step": 2395, + "teacher_loss": 0.1870955228805542 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.5561103224754333, + "learning_rate": 1.0391788347549516e-05, + "loss": 0.2373, + "step": 2396, + "teacher_loss": 0.20193053781986237 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.19197705388069153, + "learning_rate": 1.0396125487928293e-05, + "loss": 0.2192, + "step": 2397, + "teacher_loss": 0.22227761149406433 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.8447026014328003, + "learning_rate": 1.040046262830707e-05, + "loss": 0.2801, + "step": 2398, + "teacher_loss": 0.21741461753845215 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.24189075827598572, + "learning_rate": 1.0404799768685847e-05, + "loss": 0.2338, + "step": 2399, + "teacher_loss": 0.23292356729507446 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.40745067596435547, + "learning_rate": 1.0409136909064623e-05, + "loss": 0.1841, + "step": 2400, + "teacher_loss": 0.159266859292984 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.3060387372970581, + "learning_rate": 1.04134740494434e-05, + "loss": 0.1903, + "step": 2401, + "teacher_loss": 0.17744530737400055 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.2830800414085388, + "learning_rate": 1.0417811189822178e-05, + "loss": 0.2044, + "step": 2402, + "teacher_loss": 0.19561263918876648 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.34571605920791626, + "learning_rate": 1.0422148330200956e-05, + "loss": 0.3041, + "step": 2403, + "teacher_loss": 0.2994650602340698 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 1.0207722187042236, + "learning_rate": 1.0426485470579731e-05, + "loss": 0.3004, + "step": 2404, + "teacher_loss": 0.2203269749879837 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.49686408042907715, + "learning_rate": 1.0430822610958507e-05, + "loss": 0.2912, + "step": 2405, + "teacher_loss": 0.2683042883872986 + }, + { + "compression_loss": 0.0, + "epoch": 0.43, + "label_loss": 0.633056640625, + "learning_rate": 1.0435159751337285e-05, + "loss": 0.2433, + "step": 2406, + "teacher_loss": 0.19998416304588318 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.9559853076934814, + "learning_rate": 1.0439496891716063e-05, + "loss": 0.2451, + "step": 2407, + "teacher_loss": 0.16610883176326752 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.28321364521980286, + "learning_rate": 1.0443834032094839e-05, + "loss": 0.19, + "step": 2408, + "teacher_loss": 0.17967134714126587 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.2944931387901306, + "learning_rate": 1.0448171172473616e-05, + "loss": 0.2064, + "step": 2409, + "teacher_loss": 0.19664905965328217 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.3292686641216278, + "learning_rate": 1.0452508312852394e-05, + "loss": 0.2291, + "step": 2410, + "teacher_loss": 0.21791525185108185 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.3860967755317688, + "learning_rate": 1.045684545323117e-05, + "loss": 0.2801, + "step": 2411, + "teacher_loss": 0.26829662919044495 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.3558676242828369, + "learning_rate": 1.0461182593609947e-05, + "loss": 0.2191, + "step": 2412, + "teacher_loss": 0.20393508672714233 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.4674742817878723, + "learning_rate": 1.0465519733988723e-05, + "loss": 0.1898, + "step": 2413, + "teacher_loss": 0.15898266434669495 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.9602545499801636, + "learning_rate": 1.0469856874367501e-05, + "loss": 0.3149, + "step": 2414, + "teacher_loss": 0.243166983127594 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5890560150146484, + "learning_rate": 1.0474194014746278e-05, + "loss": 0.2299, + "step": 2415, + "teacher_loss": 0.19004853069782257 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.2755882143974304, + "learning_rate": 1.0478531155125054e-05, + "loss": 0.2126, + "step": 2416, + "teacher_loss": 0.20558002591133118 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.39748287200927734, + "learning_rate": 1.048286829550383e-05, + "loss": 0.2097, + "step": 2417, + "teacher_loss": 0.1887979656457901 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5701079964637756, + "learning_rate": 1.0487205435882608e-05, + "loss": 0.2016, + "step": 2418, + "teacher_loss": 0.1606331169605255 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.3085557818412781, + "learning_rate": 1.0491542576261386e-05, + "loss": 0.2555, + "step": 2419, + "teacher_loss": 0.249592125415802 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.39123088121414185, + "learning_rate": 1.0495879716640163e-05, + "loss": 0.3359, + "step": 2420, + "teacher_loss": 0.32972100377082825 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.349899023771286, + "learning_rate": 1.0500216857018937e-05, + "loss": 0.1667, + "step": 2421, + "teacher_loss": 0.14635542035102844 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.3001956343650818, + "learning_rate": 1.0504553997397715e-05, + "loss": 0.1906, + "step": 2422, + "teacher_loss": 0.178460955619812 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.7973374128341675, + "learning_rate": 1.0508891137776493e-05, + "loss": 0.275, + "step": 2423, + "teacher_loss": 0.21696211397647858 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.4588952660560608, + "learning_rate": 1.051322827815527e-05, + "loss": 0.2801, + "step": 2424, + "teacher_loss": 0.2601942718029022 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 1.0183534622192383, + "learning_rate": 1.0517565418534048e-05, + "loss": 0.2498, + "step": 2425, + "teacher_loss": 0.16445884108543396 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.8354297280311584, + "learning_rate": 1.0521902558912824e-05, + "loss": 0.2826, + "step": 2426, + "teacher_loss": 0.22111913561820984 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.2762755751609802, + "learning_rate": 1.05262396992916e-05, + "loss": 0.3158, + "step": 2427, + "teacher_loss": 0.32019728422164917 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.22240827977657318, + "learning_rate": 1.0530576839670377e-05, + "loss": 0.2396, + "step": 2428, + "teacher_loss": 0.2414652407169342 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.2177400290966034, + "learning_rate": 1.0534913980049155e-05, + "loss": 0.1792, + "step": 2429, + "teacher_loss": 0.17497298121452332 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5535277128219604, + "learning_rate": 1.0539251120427931e-05, + "loss": 0.3143, + "step": 2430, + "teacher_loss": 0.28777188062667847 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.41109567880630493, + "learning_rate": 1.0543588260806709e-05, + "loss": 0.2533, + "step": 2431, + "teacher_loss": 0.23573818802833557 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.357093870639801, + "learning_rate": 1.0547925401185485e-05, + "loss": 0.2618, + "step": 2432, + "teacher_loss": 0.25119879841804504 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5321446657180786, + "learning_rate": 1.0552262541564262e-05, + "loss": 0.2393, + "step": 2433, + "teacher_loss": 0.20674863457679749 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.48428842425346375, + "learning_rate": 1.055659968194304e-05, + "loss": 0.2548, + "step": 2434, + "teacher_loss": 0.22925138473510742 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5370008945465088, + "learning_rate": 1.0560936822321816e-05, + "loss": 0.2635, + "step": 2435, + "teacher_loss": 0.23310929536819458 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.18650224804878235, + "learning_rate": 1.0565273962700593e-05, + "loss": 0.1878, + "step": 2436, + "teacher_loss": 0.1879117339849472 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5419920682907104, + "learning_rate": 1.0569611103079371e-05, + "loss": 0.2414, + "step": 2437, + "teacher_loss": 0.20802277326583862 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.33632010221481323, + "learning_rate": 1.0573948243458147e-05, + "loss": 0.1999, + "step": 2438, + "teacher_loss": 0.18478095531463623 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.21605004370212555, + "learning_rate": 1.0578285383836923e-05, + "loss": 0.2109, + "step": 2439, + "teacher_loss": 0.2103685438632965 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.31071263551712036, + "learning_rate": 1.05826225242157e-05, + "loss": 0.2232, + "step": 2440, + "teacher_loss": 0.2135113924741745 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.30834901332855225, + "learning_rate": 1.0586959664594478e-05, + "loss": 0.2553, + "step": 2441, + "teacher_loss": 0.24945297837257385 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.45771241188049316, + "learning_rate": 1.0591296804973256e-05, + "loss": 0.2743, + "step": 2442, + "teacher_loss": 0.25395581126213074 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.4475412964820862, + "learning_rate": 1.059563394535203e-05, + "loss": 0.2855, + "step": 2443, + "teacher_loss": 0.2674804627895355 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.47804325819015503, + "learning_rate": 1.0599971085730808e-05, + "loss": 0.2948, + "step": 2444, + "teacher_loss": 0.27439579367637634 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.1676841676235199, + "learning_rate": 1.0604308226109585e-05, + "loss": 0.189, + "step": 2445, + "teacher_loss": 0.1913500279188156 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.6251732707023621, + "learning_rate": 1.0608645366488363e-05, + "loss": 0.266, + "step": 2446, + "teacher_loss": 0.22612860798835754 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5512125492095947, + "learning_rate": 1.061298250686714e-05, + "loss": 0.2804, + "step": 2447, + "teacher_loss": 0.2502940595149994 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.6812999844551086, + "learning_rate": 1.0617319647245916e-05, + "loss": 0.254, + "step": 2448, + "teacher_loss": 0.20654049515724182 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.45413699746131897, + "learning_rate": 1.0621656787624692e-05, + "loss": 0.2726, + "step": 2449, + "teacher_loss": 0.2523787021636963 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.7652422189712524, + "learning_rate": 1.062599392800347e-05, + "loss": 0.2743, + "step": 2450, + "teacher_loss": 0.2197152078151703 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.8214482069015503, + "learning_rate": 1.0630331068382247e-05, + "loss": 0.4236, + "step": 2451, + "teacher_loss": 0.3794212341308594 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.7018332481384277, + "learning_rate": 1.0634668208761023e-05, + "loss": 0.2579, + "step": 2452, + "teacher_loss": 0.20857229828834534 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.258870005607605, + "learning_rate": 1.0639005349139801e-05, + "loss": 0.2538, + "step": 2453, + "teacher_loss": 0.2532517910003662 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.723617434501648, + "learning_rate": 1.0643342489518577e-05, + "loss": 0.2927, + "step": 2454, + "teacher_loss": 0.24477502703666687 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.32321298122406006, + "learning_rate": 1.0647679629897355e-05, + "loss": 0.2652, + "step": 2455, + "teacher_loss": 0.2587481737136841 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.47916722297668457, + "learning_rate": 1.0652016770276132e-05, + "loss": 0.3125, + "step": 2456, + "teacher_loss": 0.2940249443054199 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.3623248338699341, + "learning_rate": 1.0656353910654908e-05, + "loss": 0.2185, + "step": 2457, + "teacher_loss": 0.20257121324539185 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.49092942476272583, + "learning_rate": 1.0660691051033686e-05, + "loss": 0.2399, + "step": 2458, + "teacher_loss": 0.21204935014247894 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.5602243542671204, + "learning_rate": 1.0665028191412463e-05, + "loss": 0.2323, + "step": 2459, + "teacher_loss": 0.19585971534252167 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.7578895092010498, + "learning_rate": 1.066936533179124e-05, + "loss": 0.3163, + "step": 2460, + "teacher_loss": 0.2672047019004822 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.2102113962173462, + "learning_rate": 1.0673702472170015e-05, + "loss": 0.1865, + "step": 2461, + "teacher_loss": 0.183834508061409 + }, + { + "compression_loss": 0.0, + "epoch": 0.44, + "label_loss": 0.28813832998275757, + "learning_rate": 1.0678039612548793e-05, + "loss": 0.2235, + "step": 2462, + "teacher_loss": 0.2163233608007431 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.6058992147445679, + "learning_rate": 1.068237675292757e-05, + "loss": 0.2699, + "step": 2463, + "teacher_loss": 0.232540100812912 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.6213823556900024, + "learning_rate": 1.0686713893306348e-05, + "loss": 0.2252, + "step": 2464, + "teacher_loss": 0.18116098642349243 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.46115273237228394, + "learning_rate": 1.0691051033685124e-05, + "loss": 0.2202, + "step": 2465, + "teacher_loss": 0.19341117143630981 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.4507609009742737, + "learning_rate": 1.06953881740639e-05, + "loss": 0.2426, + "step": 2466, + "teacher_loss": 0.21950627863407135 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.7430602312088013, + "learning_rate": 1.0699725314442678e-05, + "loss": 0.3182, + "step": 2467, + "teacher_loss": 0.27104824781417847 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.43340665102005005, + "learning_rate": 1.0704062454821455e-05, + "loss": 0.217, + "step": 2468, + "teacher_loss": 0.19291651248931885 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.7246847152709961, + "learning_rate": 1.0708399595200233e-05, + "loss": 0.3134, + "step": 2469, + "teacher_loss": 0.26768508553504944 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.41216230392456055, + "learning_rate": 1.0712736735579009e-05, + "loss": 0.2966, + "step": 2470, + "teacher_loss": 0.28374308347702026 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.28492599725723267, + "learning_rate": 1.0717073875957785e-05, + "loss": 0.156, + "step": 2471, + "teacher_loss": 0.14169326424598694 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5045020580291748, + "learning_rate": 1.0721411016336562e-05, + "loss": 0.2281, + "step": 2472, + "teacher_loss": 0.19744394719600677 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.18070977926254272, + "learning_rate": 1.072574815671534e-05, + "loss": 0.1358, + "step": 2473, + "teacher_loss": 0.13079451024532318 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5173914432525635, + "learning_rate": 1.0730085297094116e-05, + "loss": 0.2517, + "step": 2474, + "teacher_loss": 0.22215279936790466 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.3792479634284973, + "learning_rate": 1.0734422437472893e-05, + "loss": 0.3118, + "step": 2475, + "teacher_loss": 0.3043276369571686 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.2878420054912567, + "learning_rate": 1.073875957785167e-05, + "loss": 0.2172, + "step": 2476, + "teacher_loss": 0.2093481570482254 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.9661229848861694, + "learning_rate": 1.0743096718230447e-05, + "loss": 0.3143, + "step": 2477, + "teacher_loss": 0.24186795949935913 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5042422413825989, + "learning_rate": 1.0747433858609225e-05, + "loss": 0.2528, + "step": 2478, + "teacher_loss": 0.22481946647167206 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.26766276359558105, + "learning_rate": 1.0751770998988e-05, + "loss": 0.2115, + "step": 2479, + "teacher_loss": 0.20520789921283722 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.1566394567489624, + "learning_rate": 1.0756108139366778e-05, + "loss": 0.1784, + "step": 2480, + "teacher_loss": 0.18076878786087036 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.19514954090118408, + "learning_rate": 1.0760445279745554e-05, + "loss": 0.1976, + "step": 2481, + "teacher_loss": 0.19785045087337494 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.6062769889831543, + "learning_rate": 1.0764782420124332e-05, + "loss": 0.2444, + "step": 2482, + "teacher_loss": 0.20415878295898438 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5608301758766174, + "learning_rate": 1.0769119560503108e-05, + "loss": 0.4011, + "step": 2483, + "teacher_loss": 0.3833540081977844 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.34917008876800537, + "learning_rate": 1.0773456700881885e-05, + "loss": 0.2107, + "step": 2484, + "teacher_loss": 0.1952686607837677 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.4104021191596985, + "learning_rate": 1.0777793841260663e-05, + "loss": 0.305, + "step": 2485, + "teacher_loss": 0.2932960093021393 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.3379254937171936, + "learning_rate": 1.078213098163944e-05, + "loss": 0.2786, + "step": 2486, + "teacher_loss": 0.271992564201355 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5497634410858154, + "learning_rate": 1.0786468122018216e-05, + "loss": 0.3201, + "step": 2487, + "teacher_loss": 0.2945822477340698 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.2738702893257141, + "learning_rate": 1.0790805262396992e-05, + "loss": 0.1955, + "step": 2488, + "teacher_loss": 0.18674317002296448 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.4531628489494324, + "learning_rate": 1.079514240277577e-05, + "loss": 0.3489, + "step": 2489, + "teacher_loss": 0.33734965324401855 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.22863608598709106, + "learning_rate": 1.0799479543154548e-05, + "loss": 0.1606, + "step": 2490, + "teacher_loss": 0.1529940664768219 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.4543974697589874, + "learning_rate": 1.0803816683533325e-05, + "loss": 0.2489, + "step": 2491, + "teacher_loss": 0.22604787349700928 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.45465630292892456, + "learning_rate": 1.08081538239121e-05, + "loss": 0.2871, + "step": 2492, + "teacher_loss": 0.26848796010017395 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.19923967123031616, + "learning_rate": 1.0812490964290877e-05, + "loss": 0.215, + "step": 2493, + "teacher_loss": 0.21674197912216187 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.6403992176055908, + "learning_rate": 1.0816828104669655e-05, + "loss": 0.2242, + "step": 2494, + "teacher_loss": 0.17793838679790497 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.2759147882461548, + "learning_rate": 1.0821165245048432e-05, + "loss": 0.1929, + "step": 2495, + "teacher_loss": 0.18373069167137146 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5355908870697021, + "learning_rate": 1.0825502385427208e-05, + "loss": 0.2315, + "step": 2496, + "teacher_loss": 0.19766712188720703 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.38285648822784424, + "learning_rate": 1.0829839525805986e-05, + "loss": 0.2511, + "step": 2497, + "teacher_loss": 0.23642978072166443 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 1.0085573196411133, + "learning_rate": 1.0834176666184762e-05, + "loss": 0.3613, + "step": 2498, + "teacher_loss": 0.2893427610397339 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.2714253067970276, + "learning_rate": 1.083851380656354e-05, + "loss": 0.1885, + "step": 2499, + "teacher_loss": 0.17929911613464355 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.4236676096916199, + "learning_rate": 1.0842850946942317e-05, + "loss": 0.1919, + "step": 2500, + "teacher_loss": 0.16618189215660095 + }, + { + "epoch": 0.45, + "eval_exact_match": 79.96215704824976, + "eval_f1": 87.18487043071096, + "step": 2500 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.3454571068286896, + "learning_rate": 1.0847188087321093e-05, + "loss": 0.1868, + "step": 2501, + "teacher_loss": 0.16914451122283936 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.8027348518371582, + "learning_rate": 1.085152522769987e-05, + "loss": 0.3536, + "step": 2502, + "teacher_loss": 0.3037194013595581 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.28169554471969604, + "learning_rate": 1.0855862368078647e-05, + "loss": 0.1936, + "step": 2503, + "teacher_loss": 0.18384036421775818 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.5043920874595642, + "learning_rate": 1.0860199508457424e-05, + "loss": 0.2612, + "step": 2504, + "teacher_loss": 0.23421043157577515 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.7622601985931396, + "learning_rate": 1.08645366488362e-05, + "loss": 0.2907, + "step": 2505, + "teacher_loss": 0.2383057177066803 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.3897848129272461, + "learning_rate": 1.0868873789214978e-05, + "loss": 0.2278, + "step": 2506, + "teacher_loss": 0.2097935527563095 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.2639317810535431, + "learning_rate": 1.0873210929593755e-05, + "loss": 0.2033, + "step": 2507, + "teacher_loss": 0.19652585685253143 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.21258127689361572, + "learning_rate": 1.0877548069972533e-05, + "loss": 0.1947, + "step": 2508, + "teacher_loss": 0.19266745448112488 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.2877371311187744, + "learning_rate": 1.0881885210351309e-05, + "loss": 0.2368, + "step": 2509, + "teacher_loss": 0.23108959197998047 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.6338034272193909, + "learning_rate": 1.0886222350730085e-05, + "loss": 0.3323, + "step": 2510, + "teacher_loss": 0.2987942099571228 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.1302526593208313, + "learning_rate": 1.0890559491108862e-05, + "loss": 0.1784, + "step": 2511, + "teacher_loss": 0.18375495076179504 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 1.029805064201355, + "learning_rate": 1.089489663148764e-05, + "loss": 0.3266, + "step": 2512, + "teacher_loss": 0.24846431612968445 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.3524097204208374, + "learning_rate": 1.0899233771866418e-05, + "loss": 0.1903, + "step": 2513, + "teacher_loss": 0.17223826050758362 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.485371470451355, + "learning_rate": 1.0903570912245192e-05, + "loss": 0.2336, + "step": 2514, + "teacher_loss": 0.2055875062942505 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.16797083616256714, + "learning_rate": 1.090790805262397e-05, + "loss": 0.1802, + "step": 2515, + "teacher_loss": 0.18154458701610565 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.32138270139694214, + "learning_rate": 1.0912245193002747e-05, + "loss": 0.2608, + "step": 2516, + "teacher_loss": 0.254066526889801 + }, + { + "compression_loss": 0.0, + "epoch": 0.45, + "label_loss": 0.3390832543373108, + "learning_rate": 1.0916582333381525e-05, + "loss": 0.1711, + "step": 2517, + "teacher_loss": 0.15241624414920807 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.6732155084609985, + "learning_rate": 1.09209194737603e-05, + "loss": 0.2799, + "step": 2518, + "teacher_loss": 0.23619751632213593 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.44451266527175903, + "learning_rate": 1.0925256614139078e-05, + "loss": 0.2279, + "step": 2519, + "teacher_loss": 0.20377777516841888 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.18079423904418945, + "learning_rate": 1.0929593754517854e-05, + "loss": 0.2439, + "step": 2520, + "teacher_loss": 0.25092098116874695 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3356236219406128, + "learning_rate": 1.0933930894896632e-05, + "loss": 0.2946, + "step": 2521, + "teacher_loss": 0.29003196954727173 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.6910123825073242, + "learning_rate": 1.093826803527541e-05, + "loss": 0.4239, + "step": 2522, + "teacher_loss": 0.3942108750343323 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.5024522542953491, + "learning_rate": 1.0942605175654185e-05, + "loss": 0.2779, + "step": 2523, + "teacher_loss": 0.25294816493988037 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.4282335638999939, + "learning_rate": 1.0946942316032963e-05, + "loss": 0.2195, + "step": 2524, + "teacher_loss": 0.19629724323749542 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3950645327568054, + "learning_rate": 1.0951279456411739e-05, + "loss": 0.1897, + "step": 2525, + "teacher_loss": 0.16693225502967834 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3842361569404602, + "learning_rate": 1.0955616596790517e-05, + "loss": 0.2112, + "step": 2526, + "teacher_loss": 0.19197949767112732 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.26759836077690125, + "learning_rate": 1.0959953737169293e-05, + "loss": 0.1839, + "step": 2527, + "teacher_loss": 0.17459891736507416 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3099031448364258, + "learning_rate": 1.096429087754807e-05, + "loss": 0.2601, + "step": 2528, + "teacher_loss": 0.25451797246932983 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.6155750155448914, + "learning_rate": 1.0968628017926848e-05, + "loss": 0.327, + "step": 2529, + "teacher_loss": 0.29488617181777954 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.34697088599205017, + "learning_rate": 1.0972965158305624e-05, + "loss": 0.2829, + "step": 2530, + "teacher_loss": 0.27577322721481323 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.8503072261810303, + "learning_rate": 1.0977302298684401e-05, + "loss": 0.2882, + "step": 2531, + "teacher_loss": 0.22579072415828705 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.6570554971694946, + "learning_rate": 1.0981639439063177e-05, + "loss": 0.3597, + "step": 2532, + "teacher_loss": 0.3266124129295349 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.430215060710907, + "learning_rate": 1.0985976579441955e-05, + "loss": 0.3421, + "step": 2533, + "teacher_loss": 0.3322986364364624 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.7646737098693848, + "learning_rate": 1.0990313719820733e-05, + "loss": 0.2996, + "step": 2534, + "teacher_loss": 0.247940793633461 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.36907637119293213, + "learning_rate": 1.099465086019951e-05, + "loss": 0.2151, + "step": 2535, + "teacher_loss": 0.19801479578018188 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.2139626145362854, + "learning_rate": 1.0998988000578284e-05, + "loss": 0.1942, + "step": 2536, + "teacher_loss": 0.19205938279628754 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.47275546193122864, + "learning_rate": 1.1003325140957062e-05, + "loss": 0.1999, + "step": 2537, + "teacher_loss": 0.16956260800361633 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.43362492322921753, + "learning_rate": 1.100766228133584e-05, + "loss": 0.187, + "step": 2538, + "teacher_loss": 0.1595967561006546 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.5260012149810791, + "learning_rate": 1.1011999421714617e-05, + "loss": 0.2439, + "step": 2539, + "teacher_loss": 0.21258725225925446 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.26398134231567383, + "learning_rate": 1.1016336562093393e-05, + "loss": 0.2086, + "step": 2540, + "teacher_loss": 0.202442467212677 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.32088086009025574, + "learning_rate": 1.1020673702472169e-05, + "loss": 0.1942, + "step": 2541, + "teacher_loss": 0.18017607927322388 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.5509706735610962, + "learning_rate": 1.1025010842850947e-05, + "loss": 0.2594, + "step": 2542, + "teacher_loss": 0.2270534485578537 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.35418152809143066, + "learning_rate": 1.1029347983229724e-05, + "loss": 0.1638, + "step": 2543, + "teacher_loss": 0.14259693026542664 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.43902963399887085, + "learning_rate": 1.1033685123608502e-05, + "loss": 0.2449, + "step": 2544, + "teacher_loss": 0.22338388860225677 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.5425081253051758, + "learning_rate": 1.1038022263987278e-05, + "loss": 0.2706, + "step": 2545, + "teacher_loss": 0.24037402868270874 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.15332895517349243, + "learning_rate": 1.1042359404366056e-05, + "loss": 0.2692, + "step": 2546, + "teacher_loss": 0.28209415078163147 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.1199747622013092, + "learning_rate": 1.1046696544744831e-05, + "loss": 0.1982, + "step": 2547, + "teacher_loss": 0.20692431926727295 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3821457028388977, + "learning_rate": 1.1051033685123609e-05, + "loss": 0.1952, + "step": 2548, + "teacher_loss": 0.1744241714477539 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.4765229821205139, + "learning_rate": 1.1055370825502385e-05, + "loss": 0.2758, + "step": 2549, + "teacher_loss": 0.25352945923805237 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.47685506939888, + "learning_rate": 1.1059707965881163e-05, + "loss": 0.2363, + "step": 2550, + "teacher_loss": 0.20954486727714539 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.47066396474838257, + "learning_rate": 1.106404510625994e-05, + "loss": 0.2794, + "step": 2551, + "teacher_loss": 0.2581639587879181 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.7032426595687866, + "learning_rate": 1.1068382246638716e-05, + "loss": 0.2748, + "step": 2552, + "teacher_loss": 0.22718043625354767 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.48337581753730774, + "learning_rate": 1.1072719387017494e-05, + "loss": 0.2654, + "step": 2553, + "teacher_loss": 0.24115249514579773 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.21241718530654907, + "learning_rate": 1.107705652739627e-05, + "loss": 0.208, + "step": 2554, + "teacher_loss": 0.20746438205242157 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.5338148474693298, + "learning_rate": 1.1081393667775047e-05, + "loss": 0.305, + "step": 2555, + "teacher_loss": 0.2795740067958832 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.9185878038406372, + "learning_rate": 1.1085730808153825e-05, + "loss": 0.2797, + "step": 2556, + "teacher_loss": 0.20875728130340576 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.34692180156707764, + "learning_rate": 1.1090067948532603e-05, + "loss": 0.2462, + "step": 2557, + "teacher_loss": 0.23505136370658875 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.4488905668258667, + "learning_rate": 1.1094405088911377e-05, + "loss": 0.202, + "step": 2558, + "teacher_loss": 0.1745334267616272 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.46294406056404114, + "learning_rate": 1.1098742229290154e-05, + "loss": 0.2405, + "step": 2559, + "teacher_loss": 0.2158115804195404 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3270619511604309, + "learning_rate": 1.1103079369668932e-05, + "loss": 0.2787, + "step": 2560, + "teacher_loss": 0.2733554244041443 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3540976047515869, + "learning_rate": 1.110741651004771e-05, + "loss": 0.2012, + "step": 2561, + "teacher_loss": 0.184186190366745 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.5566182136535645, + "learning_rate": 1.1111753650426486e-05, + "loss": 0.2533, + "step": 2562, + "teacher_loss": 0.21956832706928253 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.3199962079524994, + "learning_rate": 1.1116090790805262e-05, + "loss": 0.2473, + "step": 2563, + "teacher_loss": 0.23919281363487244 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.4552381634712219, + "learning_rate": 1.112042793118404e-05, + "loss": 0.2483, + "step": 2564, + "teacher_loss": 0.22526150941848755 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 1.0423669815063477, + "learning_rate": 1.1124765071562817e-05, + "loss": 0.3261, + "step": 2565, + "teacher_loss": 0.24647323787212372 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.30001139640808105, + "learning_rate": 1.1129102211941594e-05, + "loss": 0.2329, + "step": 2566, + "teacher_loss": 0.22545361518859863 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.4931596517562866, + "learning_rate": 1.113343935232037e-05, + "loss": 0.2552, + "step": 2567, + "teacher_loss": 0.22871464490890503 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.44307535886764526, + "learning_rate": 1.1137776492699148e-05, + "loss": 0.3035, + "step": 2568, + "teacher_loss": 0.2880330979824066 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.34950628876686096, + "learning_rate": 1.1142113633077924e-05, + "loss": 0.2213, + "step": 2569, + "teacher_loss": 0.20708659291267395 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.2891450524330139, + "learning_rate": 1.1146450773456701e-05, + "loss": 0.2305, + "step": 2570, + "teacher_loss": 0.22394594550132751 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.6095059514045715, + "learning_rate": 1.1150787913835477e-05, + "loss": 0.397, + "step": 2571, + "teacher_loss": 0.3733842968940735 + }, + { + "compression_loss": 0.0, + "epoch": 0.46, + "label_loss": 0.7332524061203003, + "learning_rate": 1.1155125054214255e-05, + "loss": 0.3006, + "step": 2572, + "teacher_loss": 0.2524861693382263 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.6859009265899658, + "learning_rate": 1.1159462194593033e-05, + "loss": 0.2499, + "step": 2573, + "teacher_loss": 0.20145851373672485 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.3341946005821228, + "learning_rate": 1.1163799334971809e-05, + "loss": 0.1832, + "step": 2574, + "teacher_loss": 0.16647052764892578 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.44110292196273804, + "learning_rate": 1.1168136475350586e-05, + "loss": 0.264, + "step": 2575, + "teacher_loss": 0.24430416524410248 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.36559349298477173, + "learning_rate": 1.1172473615729362e-05, + "loss": 0.266, + "step": 2576, + "teacher_loss": 0.25488996505737305 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5404074192047119, + "learning_rate": 1.117681075610814e-05, + "loss": 0.2309, + "step": 2577, + "teacher_loss": 0.19647076725959778 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.24006101489067078, + "learning_rate": 1.1181147896486917e-05, + "loss": 0.214, + "step": 2578, + "teacher_loss": 0.2111242711544037 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5765140056610107, + "learning_rate": 1.1185485036865693e-05, + "loss": 0.2699, + "step": 2579, + "teacher_loss": 0.2358786016702652 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.36410245299339294, + "learning_rate": 1.118982217724447e-05, + "loss": 0.2084, + "step": 2580, + "teacher_loss": 0.19108179211616516 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.2288976013660431, + "learning_rate": 1.1194159317623247e-05, + "loss": 0.1497, + "step": 2581, + "teacher_loss": 0.1408894807100296 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.1544944941997528, + "learning_rate": 1.1198496458002024e-05, + "loss": 0.2286, + "step": 2582, + "teacher_loss": 0.2368241548538208 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.22837793827056885, + "learning_rate": 1.1202833598380802e-05, + "loss": 0.2089, + "step": 2583, + "teacher_loss": 0.20677350461483002 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5781692862510681, + "learning_rate": 1.1207170738759578e-05, + "loss": 0.3754, + "step": 2584, + "teacher_loss": 0.35281580686569214 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.45877158641815186, + "learning_rate": 1.1211507879138354e-05, + "loss": 0.2545, + "step": 2585, + "teacher_loss": 0.23182399570941925 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.7806944847106934, + "learning_rate": 1.1215845019517132e-05, + "loss": 0.2556, + "step": 2586, + "teacher_loss": 0.19729870557785034 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.24409300088882446, + "learning_rate": 1.122018215989591e-05, + "loss": 0.2414, + "step": 2587, + "teacher_loss": 0.2411501407623291 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.20185744762420654, + "learning_rate": 1.1224519300274687e-05, + "loss": 0.2223, + "step": 2588, + "teacher_loss": 0.22458413243293762 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.27556174993515015, + "learning_rate": 1.1228856440653463e-05, + "loss": 0.1771, + "step": 2589, + "teacher_loss": 0.16619546711444855 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4446481466293335, + "learning_rate": 1.1233193581032239e-05, + "loss": 0.3707, + "step": 2590, + "teacher_loss": 0.36245495080947876 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.3170129656791687, + "learning_rate": 1.1237530721411016e-05, + "loss": 0.2897, + "step": 2591, + "teacher_loss": 0.2866743206977844 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4113917052745819, + "learning_rate": 1.1241867861789794e-05, + "loss": 0.238, + "step": 2592, + "teacher_loss": 0.21871866285800934 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.3163455128669739, + "learning_rate": 1.124620500216857e-05, + "loss": 0.1706, + "step": 2593, + "teacher_loss": 0.15442919731140137 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.27670609951019287, + "learning_rate": 1.1250542142547347e-05, + "loss": 0.1626, + "step": 2594, + "teacher_loss": 0.14989374577999115 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.25696122646331787, + "learning_rate": 1.1254879282926125e-05, + "loss": 0.1715, + "step": 2595, + "teacher_loss": 0.16200119256973267 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.49533218145370483, + "learning_rate": 1.1259216423304901e-05, + "loss": 0.2376, + "step": 2596, + "teacher_loss": 0.2089582234621048 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.17777703702449799, + "learning_rate": 1.1263553563683679e-05, + "loss": 0.2038, + "step": 2597, + "teacher_loss": 0.20671939849853516 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5646034479141235, + "learning_rate": 1.1267890704062455e-05, + "loss": 0.209, + "step": 2598, + "teacher_loss": 0.16949300467967987 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.735980749130249, + "learning_rate": 1.1272227844441232e-05, + "loss": 0.3186, + "step": 2599, + "teacher_loss": 0.27217650413513184 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5757689476013184, + "learning_rate": 1.127656498482001e-05, + "loss": 0.3043, + "step": 2600, + "teacher_loss": 0.27409130334854126 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.3667204976081848, + "learning_rate": 1.1280902125198786e-05, + "loss": 0.2139, + "step": 2601, + "teacher_loss": 0.19693458080291748 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4980495572090149, + "learning_rate": 1.1285239265577562e-05, + "loss": 0.2086, + "step": 2602, + "teacher_loss": 0.17644032835960388 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.6358938217163086, + "learning_rate": 1.128957640595634e-05, + "loss": 0.3808, + "step": 2603, + "teacher_loss": 0.35240083932876587 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.49157604575157166, + "learning_rate": 1.1293913546335117e-05, + "loss": 0.2527, + "step": 2604, + "teacher_loss": 0.22621144354343414 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.20347315073013306, + "learning_rate": 1.1298250686713895e-05, + "loss": 0.1968, + "step": 2605, + "teacher_loss": 0.19608746469020844 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.9442662000656128, + "learning_rate": 1.130258782709267e-05, + "loss": 0.336, + "step": 2606, + "teacher_loss": 0.2683694660663605 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4317672550678253, + "learning_rate": 1.1306924967471446e-05, + "loss": 0.247, + "step": 2607, + "teacher_loss": 0.22647657990455627 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5235751867294312, + "learning_rate": 1.1311262107850224e-05, + "loss": 0.3508, + "step": 2608, + "teacher_loss": 0.3316184878349304 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.3959978222846985, + "learning_rate": 1.1315599248229002e-05, + "loss": 0.2556, + "step": 2609, + "teacher_loss": 0.2399551272392273 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.2884896397590637, + "learning_rate": 1.131993638860778e-05, + "loss": 0.1707, + "step": 2610, + "teacher_loss": 0.1576024293899536 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.20990833640098572, + "learning_rate": 1.1324273528986555e-05, + "loss": 0.2038, + "step": 2611, + "teacher_loss": 0.2031690627336502 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4589312672615051, + "learning_rate": 1.1328610669365331e-05, + "loss": 0.2272, + "step": 2612, + "teacher_loss": 0.20147913694381714 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5243818759918213, + "learning_rate": 1.1332947809744109e-05, + "loss": 0.2805, + "step": 2613, + "teacher_loss": 0.2533561885356903 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.26924628019332886, + "learning_rate": 1.1337284950122886e-05, + "loss": 0.1736, + "step": 2614, + "teacher_loss": 0.16292704641819 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.6231783628463745, + "learning_rate": 1.1341622090501662e-05, + "loss": 0.2336, + "step": 2615, + "teacher_loss": 0.1902596652507782 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.7980999946594238, + "learning_rate": 1.134595923088044e-05, + "loss": 0.3567, + "step": 2616, + "teacher_loss": 0.3076779842376709 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4810009002685547, + "learning_rate": 1.1350296371259218e-05, + "loss": 0.3618, + "step": 2617, + "teacher_loss": 0.34852278232574463 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.4672296345233917, + "learning_rate": 1.1354633511637993e-05, + "loss": 0.3082, + "step": 2618, + "teacher_loss": 0.29047930240631104 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.1778792291879654, + "learning_rate": 1.1358970652016771e-05, + "loss": 0.2226, + "step": 2619, + "teacher_loss": 0.22753119468688965 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5313377380371094, + "learning_rate": 1.1363307792395547e-05, + "loss": 0.296, + "step": 2620, + "teacher_loss": 0.2698304355144501 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.838951826095581, + "learning_rate": 1.1367644932774325e-05, + "loss": 0.3165, + "step": 2621, + "teacher_loss": 0.25843414664268494 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5401074886322021, + "learning_rate": 1.1371982073153102e-05, + "loss": 0.3942, + "step": 2622, + "teacher_loss": 0.3780317008495331 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.32501643896102905, + "learning_rate": 1.1376319213531878e-05, + "loss": 0.225, + "step": 2623, + "teacher_loss": 0.21384525299072266 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.5073922872543335, + "learning_rate": 1.1380656353910654e-05, + "loss": 0.3492, + "step": 2624, + "teacher_loss": 0.33159786462783813 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.3377583920955658, + "learning_rate": 1.1384993494289432e-05, + "loss": 0.2441, + "step": 2625, + "teacher_loss": 0.23374885320663452 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.26270586252212524, + "learning_rate": 1.138933063466821e-05, + "loss": 0.1704, + "step": 2626, + "teacher_loss": 0.16010406613349915 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.721676230430603, + "learning_rate": 1.1393667775046987e-05, + "loss": 0.2794, + "step": 2627, + "teacher_loss": 0.23020729422569275 + }, + { + "compression_loss": 0.0, + "epoch": 0.47, + "label_loss": 0.1762390434741974, + "learning_rate": 1.1398004915425763e-05, + "loss": 0.1806, + "step": 2628, + "teacher_loss": 0.1810353845357895 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.34777331352233887, + "learning_rate": 1.1402342055804539e-05, + "loss": 0.2001, + "step": 2629, + "teacher_loss": 0.18364128470420837 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.301580548286438, + "learning_rate": 1.1406679196183316e-05, + "loss": 0.1589, + "step": 2630, + "teacher_loss": 0.1430257111787796 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.17474818229675293, + "learning_rate": 1.1411016336562094e-05, + "loss": 0.1951, + "step": 2631, + "teacher_loss": 0.19736558198928833 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3953016400337219, + "learning_rate": 1.1415353476940872e-05, + "loss": 0.3168, + "step": 2632, + "teacher_loss": 0.3080419898033142 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.39360880851745605, + "learning_rate": 1.1419690617319648e-05, + "loss": 0.2275, + "step": 2633, + "teacher_loss": 0.20904362201690674 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.6607809662818909, + "learning_rate": 1.1424027757698424e-05, + "loss": 0.2853, + "step": 2634, + "teacher_loss": 0.24353361129760742 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3955228328704834, + "learning_rate": 1.1428364898077201e-05, + "loss": 0.1799, + "step": 2635, + "teacher_loss": 0.15595200657844543 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.23248809576034546, + "learning_rate": 1.1432702038455979e-05, + "loss": 0.1844, + "step": 2636, + "teacher_loss": 0.1790345311164856 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.23448440432548523, + "learning_rate": 1.1437039178834755e-05, + "loss": 0.1965, + "step": 2637, + "teacher_loss": 0.19231267273426056 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.927783727645874, + "learning_rate": 1.1441376319213532e-05, + "loss": 0.2794, + "step": 2638, + "teacher_loss": 0.20730894804000854 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3491996228694916, + "learning_rate": 1.1445713459592308e-05, + "loss": 0.2736, + "step": 2639, + "teacher_loss": 0.26519787311553955 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.571537435054779, + "learning_rate": 1.1450050599971086e-05, + "loss": 0.3666, + "step": 2640, + "teacher_loss": 0.3438640832901001 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.25649961829185486, + "learning_rate": 1.1454387740349864e-05, + "loss": 0.2054, + "step": 2641, + "teacher_loss": 0.19977012276649475 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.7252275347709656, + "learning_rate": 1.145872488072864e-05, + "loss": 0.3431, + "step": 2642, + "teacher_loss": 0.30058836936950684 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.30321455001831055, + "learning_rate": 1.1463062021107417e-05, + "loss": 0.1684, + "step": 2643, + "teacher_loss": 0.15338864922523499 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.6212984323501587, + "learning_rate": 1.1467399161486195e-05, + "loss": 0.3577, + "step": 2644, + "teacher_loss": 0.328414648771286 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.634611964225769, + "learning_rate": 1.147173630186497e-05, + "loss": 0.2898, + "step": 2645, + "teacher_loss": 0.25150930881500244 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3808760643005371, + "learning_rate": 1.1476073442243747e-05, + "loss": 0.2287, + "step": 2646, + "teacher_loss": 0.21176062524318695 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.13941128551959991, + "learning_rate": 1.1480410582622524e-05, + "loss": 0.1538, + "step": 2647, + "teacher_loss": 0.1554056853055954 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.5635295510292053, + "learning_rate": 1.1484747723001302e-05, + "loss": 0.2888, + "step": 2648, + "teacher_loss": 0.2582892179489136 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.6621547937393188, + "learning_rate": 1.148908486338008e-05, + "loss": 0.2942, + "step": 2649, + "teacher_loss": 0.2532985210418701 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.36099833250045776, + "learning_rate": 1.1493422003758855e-05, + "loss": 0.2278, + "step": 2650, + "teacher_loss": 0.2129751443862915 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.44811612367630005, + "learning_rate": 1.1497759144137631e-05, + "loss": 0.2468, + "step": 2651, + "teacher_loss": 0.22444146871566772 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.40702179074287415, + "learning_rate": 1.1502096284516409e-05, + "loss": 0.2322, + "step": 2652, + "teacher_loss": 0.21280883252620697 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.9552428722381592, + "learning_rate": 1.1506433424895187e-05, + "loss": 0.3319, + "step": 2653, + "teacher_loss": 0.2626475691795349 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.34105437994003296, + "learning_rate": 1.1510770565273964e-05, + "loss": 0.2581, + "step": 2654, + "teacher_loss": 0.24885720014572144 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.5169386863708496, + "learning_rate": 1.151510770565274e-05, + "loss": 0.2379, + "step": 2655, + "teacher_loss": 0.20691420137882233 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 1.1421351432800293, + "learning_rate": 1.1519444846031516e-05, + "loss": 0.3482, + "step": 2656, + "teacher_loss": 0.26002037525177 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.2524987459182739, + "learning_rate": 1.1523781986410294e-05, + "loss": 0.2361, + "step": 2657, + "teacher_loss": 0.23427344858646393 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.5148970484733582, + "learning_rate": 1.1528119126789071e-05, + "loss": 0.2578, + "step": 2658, + "teacher_loss": 0.22926665842533112 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3486313819885254, + "learning_rate": 1.1532456267167847e-05, + "loss": 0.1845, + "step": 2659, + "teacher_loss": 0.16624927520751953 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.6609458923339844, + "learning_rate": 1.1536793407546625e-05, + "loss": 0.386, + "step": 2660, + "teacher_loss": 0.35549598932266235 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.1997494399547577, + "learning_rate": 1.15411305479254e-05, + "loss": 0.169, + "step": 2661, + "teacher_loss": 0.16560979187488556 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.40461117029190063, + "learning_rate": 1.1545467688304178e-05, + "loss": 0.2221, + "step": 2662, + "teacher_loss": 0.2018614113330841 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.2932624816894531, + "learning_rate": 1.1549804828682956e-05, + "loss": 0.1852, + "step": 2663, + "teacher_loss": 0.17317649722099304 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.4580557942390442, + "learning_rate": 1.1554141969061732e-05, + "loss": 0.223, + "step": 2664, + "teacher_loss": 0.19686487317085266 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.5219885110855103, + "learning_rate": 1.155847910944051e-05, + "loss": 0.25, + "step": 2665, + "teacher_loss": 0.21975529193878174 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 1.0087976455688477, + "learning_rate": 1.1562816249819287e-05, + "loss": 0.4456, + "step": 2666, + "teacher_loss": 0.3830302953720093 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.589967668056488, + "learning_rate": 1.1567153390198063e-05, + "loss": 0.2979, + "step": 2667, + "teacher_loss": 0.2654666602611542 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.2949758470058441, + "learning_rate": 1.1571490530576839e-05, + "loss": 0.1934, + "step": 2668, + "teacher_loss": 0.1821596920490265 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3422999978065491, + "learning_rate": 1.1575827670955617e-05, + "loss": 0.214, + "step": 2669, + "teacher_loss": 0.19974404573440552 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.7723867893218994, + "learning_rate": 1.1580164811334394e-05, + "loss": 0.4085, + "step": 2670, + "teacher_loss": 0.36803698539733887 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.3261106610298157, + "learning_rate": 1.1584501951713172e-05, + "loss": 0.2088, + "step": 2671, + "teacher_loss": 0.1957629770040512 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 1.0239346027374268, + "learning_rate": 1.1588839092091948e-05, + "loss": 0.3255, + "step": 2672, + "teacher_loss": 0.24789631366729736 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.8380393981933594, + "learning_rate": 1.1593176232470724e-05, + "loss": 0.7181, + "step": 2673, + "teacher_loss": 0.7047662734985352 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.29857906699180603, + "learning_rate": 1.1597513372849501e-05, + "loss": 0.165, + "step": 2674, + "teacher_loss": 0.15017318725585938 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.23178820312023163, + "learning_rate": 1.1601850513228279e-05, + "loss": 0.1591, + "step": 2675, + "teacher_loss": 0.15098103880882263 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.5880321264266968, + "learning_rate": 1.1606187653607057e-05, + "loss": 0.2887, + "step": 2676, + "teacher_loss": 0.2553853988647461 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.4277018904685974, + "learning_rate": 1.161052479398583e-05, + "loss": 0.1933, + "step": 2677, + "teacher_loss": 0.16729441285133362 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.29130446910858154, + "learning_rate": 1.1614861934364608e-05, + "loss": 0.2135, + "step": 2678, + "teacher_loss": 0.20483243465423584 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.7780278921127319, + "learning_rate": 1.1619199074743386e-05, + "loss": 0.4369, + "step": 2679, + "teacher_loss": 0.3989725708961487 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.38825103640556335, + "learning_rate": 1.1623536215122164e-05, + "loss": 0.1983, + "step": 2680, + "teacher_loss": 0.1772429198026657 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.5059837102890015, + "learning_rate": 1.162787335550094e-05, + "loss": 0.2789, + "step": 2681, + "teacher_loss": 0.2536166310310364 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.7126432657241821, + "learning_rate": 1.1632210495879717e-05, + "loss": 0.236, + "step": 2682, + "teacher_loss": 0.18306750059127808 + }, + { + "compression_loss": 0.0, + "epoch": 0.48, + "label_loss": 0.8429287075996399, + "learning_rate": 1.1636547636258493e-05, + "loss": 0.2516, + "step": 2683, + "teacher_loss": 0.18587175011634827 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.7280197739601135, + "learning_rate": 1.164088477663727e-05, + "loss": 0.331, + "step": 2684, + "teacher_loss": 0.2868611216545105 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.38181179761886597, + "learning_rate": 1.1645221917016048e-05, + "loss": 0.2322, + "step": 2685, + "teacher_loss": 0.21557992696762085 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.32834070920944214, + "learning_rate": 1.1649559057394824e-05, + "loss": 0.2372, + "step": 2686, + "teacher_loss": 0.22710910439491272 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.29126954078674316, + "learning_rate": 1.1653896197773602e-05, + "loss": 0.223, + "step": 2687, + "teacher_loss": 0.21542009711265564 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.15271346271038055, + "learning_rate": 1.1658233338152378e-05, + "loss": 0.2518, + "step": 2688, + "teacher_loss": 0.26280081272125244 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.6026004552841187, + "learning_rate": 1.1662570478531156e-05, + "loss": 0.2289, + "step": 2689, + "teacher_loss": 0.18735647201538086 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.2639501690864563, + "learning_rate": 1.1666907618909931e-05, + "loss": 0.2053, + "step": 2690, + "teacher_loss": 0.19873586297035217 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.6021695137023926, + "learning_rate": 1.1671244759288709e-05, + "loss": 0.3207, + "step": 2691, + "teacher_loss": 0.2894771993160248 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.4192838668823242, + "learning_rate": 1.1675581899667487e-05, + "loss": 0.1879, + "step": 2692, + "teacher_loss": 0.16221432387828827 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.7256275415420532, + "learning_rate": 1.1679919040046264e-05, + "loss": 0.3064, + "step": 2693, + "teacher_loss": 0.2597653269767761 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.33181193470954895, + "learning_rate": 1.168425618042504e-05, + "loss": 0.2421, + "step": 2694, + "teacher_loss": 0.23215770721435547 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.1506984829902649, + "learning_rate": 1.1688593320803816e-05, + "loss": 0.2146, + "step": 2695, + "teacher_loss": 0.22171252965927124 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.5970140695571899, + "learning_rate": 1.1692930461182594e-05, + "loss": 0.336, + "step": 2696, + "teacher_loss": 0.3069695234298706 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.47001200914382935, + "learning_rate": 1.1697267601561371e-05, + "loss": 0.2536, + "step": 2697, + "teacher_loss": 0.2296069860458374 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.23714056611061096, + "learning_rate": 1.1701604741940149e-05, + "loss": 0.2411, + "step": 2698, + "teacher_loss": 0.24155963957309723 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.37077924609184265, + "learning_rate": 1.1705941882318923e-05, + "loss": 0.2114, + "step": 2699, + "teacher_loss": 0.19367915391921997 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.7926560640335083, + "learning_rate": 1.1710279022697701e-05, + "loss": 0.3557, + "step": 2700, + "teacher_loss": 0.3071998059749603 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.49158933758735657, + "learning_rate": 1.1714616163076479e-05, + "loss": 0.3216, + "step": 2701, + "teacher_loss": 0.3027087450027466 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.6287871599197388, + "learning_rate": 1.1718953303455256e-05, + "loss": 0.2506, + "step": 2702, + "teacher_loss": 0.20857484638690948 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.3568631112575531, + "learning_rate": 1.1723290443834032e-05, + "loss": 0.2215, + "step": 2703, + "teacher_loss": 0.206405371427536 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.3900279998779297, + "learning_rate": 1.172762758421281e-05, + "loss": 0.265, + "step": 2704, + "teacher_loss": 0.251122385263443 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.4751322865486145, + "learning_rate": 1.1731964724591586e-05, + "loss": 0.2354, + "step": 2705, + "teacher_loss": 0.20875263214111328 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.2733660936355591, + "learning_rate": 1.1736301864970363e-05, + "loss": 0.1903, + "step": 2706, + "teacher_loss": 0.18105006217956543 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.7769244909286499, + "learning_rate": 1.1740639005349141e-05, + "loss": 0.3563, + "step": 2707, + "teacher_loss": 0.3095134496688843 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.49802684783935547, + "learning_rate": 1.1744976145727917e-05, + "loss": 0.2292, + "step": 2708, + "teacher_loss": 0.19931241869926453 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.43429094552993774, + "learning_rate": 1.1749313286106694e-05, + "loss": 0.264, + "step": 2709, + "teacher_loss": 0.24503526091575623 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.34477633237838745, + "learning_rate": 1.175365042648547e-05, + "loss": 0.2031, + "step": 2710, + "teacher_loss": 0.1873832494020462 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.17174574732780457, + "learning_rate": 1.1757987566864248e-05, + "loss": 0.1983, + "step": 2711, + "teacher_loss": 0.20125645399093628 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.372773140668869, + "learning_rate": 1.1762324707243024e-05, + "loss": 0.2701, + "step": 2712, + "teacher_loss": 0.2586411237716675 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.4282851219177246, + "learning_rate": 1.1766661847621802e-05, + "loss": 0.2253, + "step": 2713, + "teacher_loss": 0.20271286368370056 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.7138041853904724, + "learning_rate": 1.1770998988000579e-05, + "loss": 0.3552, + "step": 2714, + "teacher_loss": 0.31534916162490845 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.8002606630325317, + "learning_rate": 1.1775336128379357e-05, + "loss": 0.3493, + "step": 2715, + "teacher_loss": 0.2991919219493866 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.48987025022506714, + "learning_rate": 1.1779673268758133e-05, + "loss": 0.3607, + "step": 2716, + "teacher_loss": 0.34633442759513855 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.29670393466949463, + "learning_rate": 1.1784010409136909e-05, + "loss": 0.223, + "step": 2717, + "teacher_loss": 0.21485163271427155 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.33022040128707886, + "learning_rate": 1.1788347549515686e-05, + "loss": 0.203, + "step": 2718, + "teacher_loss": 0.18881940841674805 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.6116946935653687, + "learning_rate": 1.1792684689894464e-05, + "loss": 0.2316, + "step": 2719, + "teacher_loss": 0.18934962153434753 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.1104709655046463, + "learning_rate": 1.1797021830273241e-05, + "loss": 0.2309, + "step": 2720, + "teacher_loss": 0.24428671598434448 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.2969475984573364, + "learning_rate": 1.1801358970652016e-05, + "loss": 0.2652, + "step": 2721, + "teacher_loss": 0.2617086172103882 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.19045990705490112, + "learning_rate": 1.1805696111030793e-05, + "loss": 0.1925, + "step": 2722, + "teacher_loss": 0.19274374842643738 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.22433248162269592, + "learning_rate": 1.1810033251409571e-05, + "loss": 0.2379, + "step": 2723, + "teacher_loss": 0.2394241988658905 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.6566988229751587, + "learning_rate": 1.1814370391788349e-05, + "loss": 0.2636, + "step": 2724, + "teacher_loss": 0.21995803713798523 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.7349234819412231, + "learning_rate": 1.1818707532167125e-05, + "loss": 0.3091, + "step": 2725, + "teacher_loss": 0.2618390917778015 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.34041017293930054, + "learning_rate": 1.18230446725459e-05, + "loss": 0.279, + "step": 2726, + "teacher_loss": 0.2722034156322479 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.12734031677246094, + "learning_rate": 1.1827381812924678e-05, + "loss": 0.1456, + "step": 2727, + "teacher_loss": 0.14766442775726318 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.3510662317276001, + "learning_rate": 1.1831718953303456e-05, + "loss": 0.2419, + "step": 2728, + "teacher_loss": 0.2297602742910385 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.41019418835639954, + "learning_rate": 1.1836056093682233e-05, + "loss": 0.2705, + "step": 2729, + "teacher_loss": 0.25496602058410645 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.35969462990760803, + "learning_rate": 1.184039323406101e-05, + "loss": 0.223, + "step": 2730, + "teacher_loss": 0.20785868167877197 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.45235681533813477, + "learning_rate": 1.1844730374439787e-05, + "loss": 0.2942, + "step": 2731, + "teacher_loss": 0.2765870690345764 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.4447060823440552, + "learning_rate": 1.1849067514818563e-05, + "loss": 0.1987, + "step": 2732, + "teacher_loss": 0.17140191793441772 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.6314231157302856, + "learning_rate": 1.185340465519734e-05, + "loss": 0.256, + "step": 2733, + "teacher_loss": 0.21425361931324005 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.44326549768447876, + "learning_rate": 1.1857741795576116e-05, + "loss": 0.2518, + "step": 2734, + "teacher_loss": 0.23052552342414856 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.18477675318717957, + "learning_rate": 1.1862078935954894e-05, + "loss": 0.2282, + "step": 2735, + "teacher_loss": 0.23301461338996887 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.14530345797538757, + "learning_rate": 1.1866416076333672e-05, + "loss": 0.1712, + "step": 2736, + "teacher_loss": 0.17407536506652832 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.4104914665222168, + "learning_rate": 1.1870753216712447e-05, + "loss": 0.2197, + "step": 2737, + "teacher_loss": 0.19850321114063263 + }, + { + "compression_loss": 0.0, + "epoch": 0.49, + "label_loss": 0.3709258437156677, + "learning_rate": 1.1875090357091225e-05, + "loss": 0.1987, + "step": 2738, + "teacher_loss": 0.17951494455337524 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3927757143974304, + "learning_rate": 1.1879427497470001e-05, + "loss": 0.2122, + "step": 2739, + "teacher_loss": 0.19213169813156128 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3682246804237366, + "learning_rate": 1.1883764637848779e-05, + "loss": 0.1874, + "step": 2740, + "teacher_loss": 0.16731923818588257 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4583500027656555, + "learning_rate": 1.1888101778227556e-05, + "loss": 0.2639, + "step": 2741, + "teacher_loss": 0.24230840802192688 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.6776990294456482, + "learning_rate": 1.1892438918606334e-05, + "loss": 0.289, + "step": 2742, + "teacher_loss": 0.24575796723365784 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.6546613574028015, + "learning_rate": 1.1896776058985108e-05, + "loss": 0.2725, + "step": 2743, + "teacher_loss": 0.23004180192947388 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5748443007469177, + "learning_rate": 1.1901113199363886e-05, + "loss": 0.3352, + "step": 2744, + "teacher_loss": 0.3085193634033203 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.7293906807899475, + "learning_rate": 1.1905450339742663e-05, + "loss": 0.2612, + "step": 2745, + "teacher_loss": 0.2091558575630188 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5433912873268127, + "learning_rate": 1.1909787480121441e-05, + "loss": 0.2396, + "step": 2746, + "teacher_loss": 0.2058178037405014 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.46515753865242004, + "learning_rate": 1.1914124620500217e-05, + "loss": 0.2542, + "step": 2747, + "teacher_loss": 0.2307864874601364 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.1727769672870636, + "learning_rate": 1.1918461760878993e-05, + "loss": 0.1949, + "step": 2748, + "teacher_loss": 0.19736497104167938 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.392741322517395, + "learning_rate": 1.192279890125777e-05, + "loss": 0.2164, + "step": 2749, + "teacher_loss": 0.19679558277130127 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5824330449104309, + "learning_rate": 1.1927136041636548e-05, + "loss": 0.3114, + "step": 2750, + "teacher_loss": 0.2812670171260834 + }, + { + "epoch": 0.5, + "eval_exact_match": 79.54588457899716, + "eval_f1": 87.13278090917339, + "step": 2750 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3725287914276123, + "learning_rate": 1.1931473182015326e-05, + "loss": 0.2371, + "step": 2751, + "teacher_loss": 0.22203272581100464 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.26836833357810974, + "learning_rate": 1.1935810322394102e-05, + "loss": 0.2585, + "step": 2752, + "teacher_loss": 0.25743967294692993 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.24945861101150513, + "learning_rate": 1.194014746277288e-05, + "loss": 0.1526, + "step": 2753, + "teacher_loss": 0.14184768497943878 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5122396945953369, + "learning_rate": 1.1944484603151655e-05, + "loss": 0.3449, + "step": 2754, + "teacher_loss": 0.3262713551521301 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.9176746606826782, + "learning_rate": 1.1948821743530433e-05, + "loss": 0.3953, + "step": 2755, + "teacher_loss": 0.33729344606399536 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.34939056634902954, + "learning_rate": 1.1953158883909209e-05, + "loss": 0.2283, + "step": 2756, + "teacher_loss": 0.21482336521148682 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.39029139280319214, + "learning_rate": 1.1957496024287986e-05, + "loss": 0.2044, + "step": 2757, + "teacher_loss": 0.18375341594219208 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.30850085616111755, + "learning_rate": 1.1961833164666764e-05, + "loss": 0.1769, + "step": 2758, + "teacher_loss": 0.16227969527244568 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3603702187538147, + "learning_rate": 1.196617030504554e-05, + "loss": 0.2305, + "step": 2759, + "teacher_loss": 0.21602925658226013 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5798477530479431, + "learning_rate": 1.1970507445424318e-05, + "loss": 0.2892, + "step": 2760, + "teacher_loss": 0.25690627098083496 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.6092903017997742, + "learning_rate": 1.1974844585803093e-05, + "loss": 0.2672, + "step": 2761, + "teacher_loss": 0.22922618687152863 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4217924177646637, + "learning_rate": 1.1979181726181871e-05, + "loss": 0.3656, + "step": 2762, + "teacher_loss": 0.3594071567058563 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4085831046104431, + "learning_rate": 1.1983518866560649e-05, + "loss": 0.2294, + "step": 2763, + "teacher_loss": 0.20952939987182617 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5528672933578491, + "learning_rate": 1.1987856006939426e-05, + "loss": 0.3168, + "step": 2764, + "teacher_loss": 0.29051852226257324 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5818487405776978, + "learning_rate": 1.19921931473182e-05, + "loss": 0.258, + "step": 2765, + "teacher_loss": 0.22200913727283478 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.7699317932128906, + "learning_rate": 1.1996530287696978e-05, + "loss": 0.3074, + "step": 2766, + "teacher_loss": 0.2559676468372345 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.7366092801094055, + "learning_rate": 1.2000867428075756e-05, + "loss": 0.2849, + "step": 2767, + "teacher_loss": 0.2347569465637207 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.8108692765235901, + "learning_rate": 1.2005204568454533e-05, + "loss": 0.3502, + "step": 2768, + "teacher_loss": 0.29904770851135254 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.627205491065979, + "learning_rate": 1.2009541708833311e-05, + "loss": 0.3901, + "step": 2769, + "teacher_loss": 0.3637816607952118 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4472658932209015, + "learning_rate": 1.2013878849212085e-05, + "loss": 0.4401, + "step": 2770, + "teacher_loss": 0.4392518401145935 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.31642910838127136, + "learning_rate": 1.2018215989590863e-05, + "loss": 0.221, + "step": 2771, + "teacher_loss": 0.21039330959320068 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.39622044563293457, + "learning_rate": 1.202255312996964e-05, + "loss": 0.2684, + "step": 2772, + "teacher_loss": 0.25414392352104187 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5222471952438354, + "learning_rate": 1.2026890270348418e-05, + "loss": 0.2168, + "step": 2773, + "teacher_loss": 0.18284811079502106 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3399708867073059, + "learning_rate": 1.2031227410727194e-05, + "loss": 0.2381, + "step": 2774, + "teacher_loss": 0.226749449968338 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3524761497974396, + "learning_rate": 1.203556455110597e-05, + "loss": 0.2597, + "step": 2775, + "teacher_loss": 0.24944472312927246 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.175907164812088, + "learning_rate": 1.2039901691484748e-05, + "loss": 0.167, + "step": 2776, + "teacher_loss": 0.1660478711128235 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5861552357673645, + "learning_rate": 1.2044238831863525e-05, + "loss": 0.2065, + "step": 2777, + "teacher_loss": 0.16427116096019745 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3690280318260193, + "learning_rate": 1.2048575972242301e-05, + "loss": 0.2138, + "step": 2778, + "teacher_loss": 0.19651058316230774 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.18774858117103577, + "learning_rate": 1.2052913112621079e-05, + "loss": 0.1682, + "step": 2779, + "teacher_loss": 0.16606751084327698 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3397512435913086, + "learning_rate": 1.2057250252999856e-05, + "loss": 0.2046, + "step": 2780, + "teacher_loss": 0.18961921334266663 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.2792900502681732, + "learning_rate": 1.2061587393378632e-05, + "loss": 0.1867, + "step": 2781, + "teacher_loss": 0.17643892765045166 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.42095446586608887, + "learning_rate": 1.206592453375741e-05, + "loss": 0.1754, + "step": 2782, + "teacher_loss": 0.14810076355934143 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.6333006620407104, + "learning_rate": 1.2070261674136186e-05, + "loss": 0.2506, + "step": 2783, + "teacher_loss": 0.2080870270729065 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.3065766394138336, + "learning_rate": 1.2074598814514964e-05, + "loss": 0.2101, + "step": 2784, + "teacher_loss": 0.19934040307998657 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4391058385372162, + "learning_rate": 1.2078935954893741e-05, + "loss": 0.2594, + "step": 2785, + "teacher_loss": 0.23942387104034424 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.2367694079875946, + "learning_rate": 1.2083273095272517e-05, + "loss": 0.2004, + "step": 2786, + "teacher_loss": 0.19637976586818695 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.21591030061244965, + "learning_rate": 1.2087610235651293e-05, + "loss": 0.2412, + "step": 2787, + "teacher_loss": 0.24399667978286743 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.2285538911819458, + "learning_rate": 1.209194737603007e-05, + "loss": 0.1792, + "step": 2788, + "teacher_loss": 0.17375943064689636 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.5220904350280762, + "learning_rate": 1.2096284516408848e-05, + "loss": 0.3024, + "step": 2789, + "teacher_loss": 0.27803170680999756 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.14029623568058014, + "learning_rate": 1.2100621656787626e-05, + "loss": 0.1604, + "step": 2790, + "teacher_loss": 0.16259488463401794 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4635617136955261, + "learning_rate": 1.2104958797166404e-05, + "loss": 0.2824, + "step": 2791, + "teacher_loss": 0.2622499167919159 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.4710789620876312, + "learning_rate": 1.2109295937545178e-05, + "loss": 0.2666, + "step": 2792, + "teacher_loss": 0.2439209222793579 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.7224783301353455, + "learning_rate": 1.2113633077923955e-05, + "loss": 0.3356, + "step": 2793, + "teacher_loss": 0.29260414838790894 + }, + { + "compression_loss": 0.0, + "epoch": 0.5, + "label_loss": 0.553899884223938, + "learning_rate": 1.2117970218302733e-05, + "loss": 0.3377, + "step": 2794, + "teacher_loss": 0.31364181637763977 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4261709153652191, + "learning_rate": 1.212230735868151e-05, + "loss": 0.2447, + "step": 2795, + "teacher_loss": 0.2245543897151947 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.8256220817565918, + "learning_rate": 1.2126644499060287e-05, + "loss": 0.4943, + "step": 2796, + "teacher_loss": 0.45752960443496704 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.599109411239624, + "learning_rate": 1.2130981639439062e-05, + "loss": 0.2988, + "step": 2797, + "teacher_loss": 0.2654431164264679 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4407704174518585, + "learning_rate": 1.213531877981784e-05, + "loss": 0.2365, + "step": 2798, + "teacher_loss": 0.21375274658203125 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.356095552444458, + "learning_rate": 1.2139655920196618e-05, + "loss": 0.2606, + "step": 2799, + "teacher_loss": 0.24996507167816162 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4790748357772827, + "learning_rate": 1.2143993060575394e-05, + "loss": 0.2325, + "step": 2800, + "teacher_loss": 0.2051496058702469 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.23424725234508514, + "learning_rate": 1.2148330200954171e-05, + "loss": 0.1821, + "step": 2801, + "teacher_loss": 0.17631107568740845 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.25061652064323425, + "learning_rate": 1.2152667341332949e-05, + "loss": 0.1778, + "step": 2802, + "teacher_loss": 0.16966739296913147 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.8287709951400757, + "learning_rate": 1.2157004481711725e-05, + "loss": 0.3683, + "step": 2803, + "teacher_loss": 0.3171241283416748 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.10736843198537827, + "learning_rate": 1.2161341622090502e-05, + "loss": 0.1957, + "step": 2804, + "teacher_loss": 0.20553088188171387 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.2934303879737854, + "learning_rate": 1.2165678762469278e-05, + "loss": 0.2333, + "step": 2805, + "teacher_loss": 0.22658444941043854 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4178912043571472, + "learning_rate": 1.2170015902848056e-05, + "loss": 0.2739, + "step": 2806, + "teacher_loss": 0.257956326007843 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.3434050679206848, + "learning_rate": 1.2174353043226834e-05, + "loss": 0.2075, + "step": 2807, + "teacher_loss": 0.19240860641002655 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.6782447695732117, + "learning_rate": 1.217869018360561e-05, + "loss": 0.2411, + "step": 2808, + "teacher_loss": 0.19255954027175903 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.3788807988166809, + "learning_rate": 1.2183027323984385e-05, + "loss": 0.306, + "step": 2809, + "teacher_loss": 0.2979472279548645 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.7416969537734985, + "learning_rate": 1.2187364464363163e-05, + "loss": 0.2785, + "step": 2810, + "teacher_loss": 0.22706541419029236 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4763883948326111, + "learning_rate": 1.219170160474194e-05, + "loss": 0.2749, + "step": 2811, + "teacher_loss": 0.2525593638420105 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.08716742694377899, + "learning_rate": 1.2196038745120718e-05, + "loss": 0.1903, + "step": 2812, + "teacher_loss": 0.20171219110488892 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.2611193060874939, + "learning_rate": 1.2200375885499496e-05, + "loss": 0.2601, + "step": 2813, + "teacher_loss": 0.25993168354034424 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.5312067866325378, + "learning_rate": 1.220471302587827e-05, + "loss": 0.1794, + "step": 2814, + "teacher_loss": 0.1403425931930542 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.24882356822490692, + "learning_rate": 1.2209050166257048e-05, + "loss": 0.1695, + "step": 2815, + "teacher_loss": 0.16065910458564758 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.6479905843734741, + "learning_rate": 1.2213387306635825e-05, + "loss": 0.238, + "step": 2816, + "teacher_loss": 0.1924542933702469 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.3875870108604431, + "learning_rate": 1.2217724447014603e-05, + "loss": 0.1816, + "step": 2817, + "teacher_loss": 0.1587332785129547 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.7681722640991211, + "learning_rate": 1.2222061587393379e-05, + "loss": 0.3172, + "step": 2818, + "teacher_loss": 0.2670750916004181 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.2610691487789154, + "learning_rate": 1.2226398727772155e-05, + "loss": 0.2649, + "step": 2819, + "teacher_loss": 0.2653735876083374 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.7264211773872375, + "learning_rate": 1.2230735868150933e-05, + "loss": 0.2708, + "step": 2820, + "teacher_loss": 0.2201305776834488 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.8096187114715576, + "learning_rate": 1.223507300852971e-05, + "loss": 0.3187, + "step": 2821, + "teacher_loss": 0.2640998363494873 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.7291936874389648, + "learning_rate": 1.2239410148908486e-05, + "loss": 0.2545, + "step": 2822, + "teacher_loss": 0.20177412033081055 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.6875884532928467, + "learning_rate": 1.2243747289287264e-05, + "loss": 0.3436, + "step": 2823, + "teacher_loss": 0.30543211102485657 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.24918492138385773, + "learning_rate": 1.224808442966604e-05, + "loss": 0.173, + "step": 2824, + "teacher_loss": 0.16451023519039154 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.33140647411346436, + "learning_rate": 1.2252421570044817e-05, + "loss": 0.2441, + "step": 2825, + "teacher_loss": 0.23441873490810394 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.6007271409034729, + "learning_rate": 1.2256758710423595e-05, + "loss": 0.2107, + "step": 2826, + "teacher_loss": 0.167380690574646 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.3942256569862366, + "learning_rate": 1.226109585080237e-05, + "loss": 0.2235, + "step": 2827, + "teacher_loss": 0.20448797941207886 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.5017439126968384, + "learning_rate": 1.2265432991181148e-05, + "loss": 0.191, + "step": 2828, + "teacher_loss": 0.15643101930618286 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4072628319263458, + "learning_rate": 1.2269770131559926e-05, + "loss": 0.2955, + "step": 2829, + "teacher_loss": 0.2830515503883362 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.372791588306427, + "learning_rate": 1.2274107271938702e-05, + "loss": 0.3129, + "step": 2830, + "teacher_loss": 0.30627113580703735 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.32183337211608887, + "learning_rate": 1.2278444412317478e-05, + "loss": 0.2056, + "step": 2831, + "teacher_loss": 0.19263172149658203 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.26824823021888733, + "learning_rate": 1.2282781552696256e-05, + "loss": 0.2484, + "step": 2832, + "teacher_loss": 0.24618060886859894 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.9093698263168335, + "learning_rate": 1.2287118693075033e-05, + "loss": 0.4284, + "step": 2833, + "teacher_loss": 0.3749205470085144 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.32723698019981384, + "learning_rate": 1.229145583345381e-05, + "loss": 0.2176, + "step": 2834, + "teacher_loss": 0.20544511079788208 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.3240583837032318, + "learning_rate": 1.2295792973832587e-05, + "loss": 0.1969, + "step": 2835, + "teacher_loss": 0.18272244930267334 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.47969186305999756, + "learning_rate": 1.2300130114211363e-05, + "loss": 0.228, + "step": 2836, + "teacher_loss": 0.20006148517131805 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.655357301235199, + "learning_rate": 1.230446725459014e-05, + "loss": 0.2366, + "step": 2837, + "teacher_loss": 0.19008678197860718 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4674513339996338, + "learning_rate": 1.2308804394968918e-05, + "loss": 0.1721, + "step": 2838, + "teacher_loss": 0.13925771415233612 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.7312817573547363, + "learning_rate": 1.2313141535347695e-05, + "loss": 0.3999, + "step": 2839, + "teacher_loss": 0.3630879819393158 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.3362359404563904, + "learning_rate": 1.2317478675726471e-05, + "loss": 0.2156, + "step": 2840, + "teacher_loss": 0.20220975577831268 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.2592926025390625, + "learning_rate": 1.2321815816105247e-05, + "loss": 0.2117, + "step": 2841, + "teacher_loss": 0.2064572423696518 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.28975388407707214, + "learning_rate": 1.2326152956484025e-05, + "loss": 0.2361, + "step": 2842, + "teacher_loss": 0.2301521897315979 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.21224454045295715, + "learning_rate": 1.2330490096862803e-05, + "loss": 0.1825, + "step": 2843, + "teacher_loss": 0.1792450249195099 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.33605241775512695, + "learning_rate": 1.2334827237241579e-05, + "loss": 0.2524, + "step": 2844, + "teacher_loss": 0.24305102229118347 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.4312984347343445, + "learning_rate": 1.2339164377620356e-05, + "loss": 0.3095, + "step": 2845, + "teacher_loss": 0.2959633469581604 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.2551267445087433, + "learning_rate": 1.2343501517999132e-05, + "loss": 0.3258, + "step": 2846, + "teacher_loss": 0.3336018919944763 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.5234556794166565, + "learning_rate": 1.234783865837791e-05, + "loss": 0.3254, + "step": 2847, + "teacher_loss": 0.3033400774002075 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.45214471220970154, + "learning_rate": 1.2352175798756687e-05, + "loss": 0.2217, + "step": 2848, + "teacher_loss": 0.19613017141819 + }, + { + "compression_loss": 0.0, + "epoch": 0.51, + "label_loss": 0.7057472467422485, + "learning_rate": 1.2356512939135463e-05, + "loss": 0.493, + "step": 2849, + "teacher_loss": 0.46934348344802856 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.4267593026161194, + "learning_rate": 1.2360850079514241e-05, + "loss": 0.2242, + "step": 2850, + "teacher_loss": 0.20168596506118774 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.33162248134613037, + "learning_rate": 1.2365187219893018e-05, + "loss": 0.2405, + "step": 2851, + "teacher_loss": 0.2303842306137085 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5279541015625, + "learning_rate": 1.2369524360271794e-05, + "loss": 0.2799, + "step": 2852, + "teacher_loss": 0.25236034393310547 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.6938744783401489, + "learning_rate": 1.237386150065057e-05, + "loss": 0.3351, + "step": 2853, + "teacher_loss": 0.2952754497528076 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.3975841999053955, + "learning_rate": 1.2378198641029348e-05, + "loss": 0.1912, + "step": 2854, + "teacher_loss": 0.16821298003196716 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.6715163588523865, + "learning_rate": 1.2382535781408126e-05, + "loss": 0.379, + "step": 2855, + "teacher_loss": 0.34648776054382324 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.446830153465271, + "learning_rate": 1.2386872921786903e-05, + "loss": 0.3289, + "step": 2856, + "teacher_loss": 0.3158435821533203 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.28678110241889954, + "learning_rate": 1.2391210062165679e-05, + "loss": 0.1815, + "step": 2857, + "teacher_loss": 0.16984233260154724 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5985437035560608, + "learning_rate": 1.2395547202544455e-05, + "loss": 0.2799, + "step": 2858, + "teacher_loss": 0.244448721408844 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.39948201179504395, + "learning_rate": 1.2399884342923233e-05, + "loss": 0.2913, + "step": 2859, + "teacher_loss": 0.27922794222831726 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5931471586227417, + "learning_rate": 1.240422148330201e-05, + "loss": 0.3072, + "step": 2860, + "teacher_loss": 0.27547332644462585 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.3496328294277191, + "learning_rate": 1.2408558623680788e-05, + "loss": 0.2097, + "step": 2861, + "teacher_loss": 0.19419899582862854 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5771419405937195, + "learning_rate": 1.2412895764059564e-05, + "loss": 0.2271, + "step": 2862, + "teacher_loss": 0.188198983669281 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.42289865016937256, + "learning_rate": 1.241723290443834e-05, + "loss": 0.1963, + "step": 2863, + "teacher_loss": 0.17114368081092834 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5555446147918701, + "learning_rate": 1.2421570044817117e-05, + "loss": 0.2872, + "step": 2864, + "teacher_loss": 0.2573981285095215 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.411462664604187, + "learning_rate": 1.2425907185195895e-05, + "loss": 0.2659, + "step": 2865, + "teacher_loss": 0.24976889789104462 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.21053755283355713, + "learning_rate": 1.2430244325574671e-05, + "loss": 0.1975, + "step": 2866, + "teacher_loss": 0.19603992998600006 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5680697560310364, + "learning_rate": 1.2434581465953449e-05, + "loss": 0.2807, + "step": 2867, + "teacher_loss": 0.24880953133106232 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2706186771392822, + "learning_rate": 1.2438918606332225e-05, + "loss": 0.2031, + "step": 2868, + "teacher_loss": 0.19558212161064148 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.4105105996131897, + "learning_rate": 1.2443255746711002e-05, + "loss": 0.1986, + "step": 2869, + "teacher_loss": 0.17500299215316772 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.398613840341568, + "learning_rate": 1.244759288708978e-05, + "loss": 0.2584, + "step": 2870, + "teacher_loss": 0.24283260107040405 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5273966193199158, + "learning_rate": 1.2451930027468556e-05, + "loss": 0.2526, + "step": 2871, + "teacher_loss": 0.22201509773731232 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.6459747552871704, + "learning_rate": 1.2456267167847333e-05, + "loss": 0.2373, + "step": 2872, + "teacher_loss": 0.19186076521873474 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.31774747371673584, + "learning_rate": 1.246060430822611e-05, + "loss": 0.2708, + "step": 2873, + "teacher_loss": 0.2656227648258209 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2946651577949524, + "learning_rate": 1.2464941448604887e-05, + "loss": 0.2578, + "step": 2874, + "teacher_loss": 0.253707617521286 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 1.1280405521392822, + "learning_rate": 1.2469278588983663e-05, + "loss": 0.3383, + "step": 2875, + "teacher_loss": 0.25050485134124756 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.286027193069458, + "learning_rate": 1.247361572936244e-05, + "loss": 0.1733, + "step": 2876, + "teacher_loss": 0.1608046293258667 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2591002583503723, + "learning_rate": 1.2477952869741218e-05, + "loss": 0.2106, + "step": 2877, + "teacher_loss": 0.2051936835050583 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.49066299200057983, + "learning_rate": 1.2482290010119996e-05, + "loss": 0.2394, + "step": 2878, + "teacher_loss": 0.21149258315563202 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.8377897143363953, + "learning_rate": 1.2486627150498772e-05, + "loss": 0.2933, + "step": 2879, + "teacher_loss": 0.2328384518623352 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2672974765300751, + "learning_rate": 1.2490964290877548e-05, + "loss": 0.2285, + "step": 2880, + "teacher_loss": 0.2241635024547577 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.4621201753616333, + "learning_rate": 1.2495301431256325e-05, + "loss": 0.41, + "step": 2881, + "teacher_loss": 0.40423041582107544 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.4419918656349182, + "learning_rate": 1.2499638571635103e-05, + "loss": 0.2599, + "step": 2882, + "teacher_loss": 0.23968642950057983 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.36202648282051086, + "learning_rate": 1.250397571201388e-05, + "loss": 0.3001, + "step": 2883, + "teacher_loss": 0.2932130694389343 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.33822235465049744, + "learning_rate": 1.2508312852392655e-05, + "loss": 0.2168, + "step": 2884, + "teacher_loss": 0.20332825183868408 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.8938894271850586, + "learning_rate": 1.2512649992771432e-05, + "loss": 0.3272, + "step": 2885, + "teacher_loss": 0.2641940116882324 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2768236994743347, + "learning_rate": 1.251698713315021e-05, + "loss": 0.2129, + "step": 2886, + "teacher_loss": 0.20578469336032867 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.22182798385620117, + "learning_rate": 1.2521324273528987e-05, + "loss": 0.1873, + "step": 2887, + "teacher_loss": 0.18349069356918335 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.34089893102645874, + "learning_rate": 1.2525661413907763e-05, + "loss": 0.1754, + "step": 2888, + "teacher_loss": 0.15703445672988892 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.6644532680511475, + "learning_rate": 1.2529998554286541e-05, + "loss": 0.2993, + "step": 2889, + "teacher_loss": 0.25872868299484253 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.44714200496673584, + "learning_rate": 1.2534335694665317e-05, + "loss": 0.2213, + "step": 2890, + "teacher_loss": 0.1962614804506302 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2098473459482193, + "learning_rate": 1.2538672835044095e-05, + "loss": 0.1655, + "step": 2891, + "teacher_loss": 0.1605478674173355 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.34414029121398926, + "learning_rate": 1.2543009975422872e-05, + "loss": 0.2457, + "step": 2892, + "teacher_loss": 0.23479852080345154 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5456206798553467, + "learning_rate": 1.2547347115801648e-05, + "loss": 0.2487, + "step": 2893, + "teacher_loss": 0.21566279232501984 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.15823210775852203, + "learning_rate": 1.2551684256180426e-05, + "loss": 0.1617, + "step": 2894, + "teacher_loss": 0.16212186217308044 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2455466091632843, + "learning_rate": 1.2556021396559202e-05, + "loss": 0.1566, + "step": 2895, + "teacher_loss": 0.14672674238681793 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.23622816801071167, + "learning_rate": 1.256035853693798e-05, + "loss": 0.1727, + "step": 2896, + "teacher_loss": 0.1656712293624878 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.24362681806087494, + "learning_rate": 1.2564695677316755e-05, + "loss": 0.2157, + "step": 2897, + "teacher_loss": 0.2125878930091858 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.502642810344696, + "learning_rate": 1.2569032817695533e-05, + "loss": 0.2353, + "step": 2898, + "teacher_loss": 0.20561864972114563 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.28127628564834595, + "learning_rate": 1.257336995807431e-05, + "loss": 0.2254, + "step": 2899, + "teacher_loss": 0.21923725306987762 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.43687888979911804, + "learning_rate": 1.2577707098453088e-05, + "loss": 0.2391, + "step": 2900, + "teacher_loss": 0.21712376177310944 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.2428971230983734, + "learning_rate": 1.2582044238831864e-05, + "loss": 0.1937, + "step": 2901, + "teacher_loss": 0.1882447749376297 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.5596681833267212, + "learning_rate": 1.258638137921064e-05, + "loss": 0.2584, + "step": 2902, + "teacher_loss": 0.22494874894618988 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.31732532382011414, + "learning_rate": 1.2590718519589418e-05, + "loss": 0.2766, + "step": 2903, + "teacher_loss": 0.2720375657081604 + }, + { + "compression_loss": 0.0, + "epoch": 0.52, + "label_loss": 0.9225265979766846, + "learning_rate": 1.2595055659968195e-05, + "loss": 0.2666, + "step": 2904, + "teacher_loss": 0.19372084736824036 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.22705870866775513, + "learning_rate": 1.2599392800346973e-05, + "loss": 0.1652, + "step": 2905, + "teacher_loss": 0.15828801691532135 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.5391129851341248, + "learning_rate": 1.2603729940725747e-05, + "loss": 0.2154, + "step": 2906, + "teacher_loss": 0.1794031858444214 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.9196866750717163, + "learning_rate": 1.2608067081104525e-05, + "loss": 1.0142, + "step": 2907, + "teacher_loss": 1.0247061252593994 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3779296875, + "learning_rate": 1.2612404221483302e-05, + "loss": 0.2771, + "step": 2908, + "teacher_loss": 0.2658865451812744 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.8352428078651428, + "learning_rate": 1.261674136186208e-05, + "loss": 0.3728, + "step": 2909, + "teacher_loss": 0.3214607834815979 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.4498641788959503, + "learning_rate": 1.2621078502240856e-05, + "loss": 0.1908, + "step": 2910, + "teacher_loss": 0.1619841307401657 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.13946379721164703, + "learning_rate": 1.2625415642619633e-05, + "loss": 0.2458, + "step": 2911, + "teacher_loss": 0.25761860609054565 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.2651360332965851, + "learning_rate": 1.262975278299841e-05, + "loss": 0.1792, + "step": 2912, + "teacher_loss": 0.16965562105178833 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3686763048171997, + "learning_rate": 1.2634089923377187e-05, + "loss": 0.2712, + "step": 2913, + "teacher_loss": 0.2603206932544708 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3693809509277344, + "learning_rate": 1.2638427063755965e-05, + "loss": 0.254, + "step": 2914, + "teacher_loss": 0.24115371704101562 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.487461119890213, + "learning_rate": 1.264276420413474e-05, + "loss": 0.2517, + "step": 2915, + "teacher_loss": 0.22555246949195862 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.16619402170181274, + "learning_rate": 1.2647101344513518e-05, + "loss": 0.2858, + "step": 2916, + "teacher_loss": 0.299077570438385 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.26363813877105713, + "learning_rate": 1.2651438484892294e-05, + "loss": 0.2137, + "step": 2917, + "teacher_loss": 0.20814919471740723 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.7735315561294556, + "learning_rate": 1.2655775625271072e-05, + "loss": 0.4215, + "step": 2918, + "teacher_loss": 0.38242459297180176 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.68896484375, + "learning_rate": 1.2660112765649848e-05, + "loss": 0.2859, + "step": 2919, + "teacher_loss": 0.24111396074295044 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.31177210807800293, + "learning_rate": 1.2664449906028625e-05, + "loss": 0.3807, + "step": 2920, + "teacher_loss": 0.38830476999282837 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.957051157951355, + "learning_rate": 1.2668787046407403e-05, + "loss": 0.3113, + "step": 2921, + "teacher_loss": 0.23959200084209442 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.7861664891242981, + "learning_rate": 1.267312418678618e-05, + "loss": 0.3176, + "step": 2922, + "teacher_loss": 0.26552748680114746 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.37029603123664856, + "learning_rate": 1.2677461327164956e-05, + "loss": 0.2775, + "step": 2923, + "teacher_loss": 0.26715022325515747 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.7816681265830994, + "learning_rate": 1.2681798467543732e-05, + "loss": 0.2699, + "step": 2924, + "teacher_loss": 0.21298792958259583 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.4529823660850525, + "learning_rate": 1.268613560792251e-05, + "loss": 0.2383, + "step": 2925, + "teacher_loss": 0.2144116908311844 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3037337064743042, + "learning_rate": 1.2690472748301288e-05, + "loss": 0.1906, + "step": 2926, + "teacher_loss": 0.17799633741378784 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.5503349304199219, + "learning_rate": 1.2694809888680065e-05, + "loss": 0.3369, + "step": 2927, + "teacher_loss": 0.3131994903087616 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.45318156480789185, + "learning_rate": 1.269914702905884e-05, + "loss": 0.2991, + "step": 2928, + "teacher_loss": 0.2819896340370178 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.6324386596679688, + "learning_rate": 1.2703484169437617e-05, + "loss": 0.2648, + "step": 2929, + "teacher_loss": 0.22397229075431824 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.4378213882446289, + "learning_rate": 1.2707821309816395e-05, + "loss": 0.265, + "step": 2930, + "teacher_loss": 0.24574480950832367 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.421183317899704, + "learning_rate": 1.2712158450195172e-05, + "loss": 0.2235, + "step": 2931, + "teacher_loss": 0.20153206586837769 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3889850974082947, + "learning_rate": 1.271649559057395e-05, + "loss": 0.2386, + "step": 2932, + "teacher_loss": 0.22189223766326904 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.19437116384506226, + "learning_rate": 1.2720832730952724e-05, + "loss": 0.2291, + "step": 2933, + "teacher_loss": 0.23290346562862396 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.24955680966377258, + "learning_rate": 1.2725169871331502e-05, + "loss": 0.1752, + "step": 2934, + "teacher_loss": 0.16697286069393158 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.677643895149231, + "learning_rate": 1.272950701171028e-05, + "loss": 0.252, + "step": 2935, + "teacher_loss": 0.2047576904296875 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.4003026485443115, + "learning_rate": 1.2733844152089057e-05, + "loss": 0.3736, + "step": 2936, + "teacher_loss": 0.3706578016281128 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3286217153072357, + "learning_rate": 1.2738181292467833e-05, + "loss": 0.2492, + "step": 2937, + "teacher_loss": 0.24041131138801575 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3038771450519562, + "learning_rate": 1.274251843284661e-05, + "loss": 0.2376, + "step": 2938, + "teacher_loss": 0.2302335798740387 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.20332199335098267, + "learning_rate": 1.2746855573225387e-05, + "loss": 0.2685, + "step": 2939, + "teacher_loss": 0.27578121423721313 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.8955676555633545, + "learning_rate": 1.2751192713604164e-05, + "loss": 0.2943, + "step": 2940, + "teacher_loss": 0.22750616073608398 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.4472431540489197, + "learning_rate": 1.275552985398294e-05, + "loss": 0.2416, + "step": 2941, + "teacher_loss": 0.21879026293754578 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.23451286554336548, + "learning_rate": 1.2759866994361718e-05, + "loss": 0.2458, + "step": 2942, + "teacher_loss": 0.24701380729675293 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.17961151897907257, + "learning_rate": 1.2764204134740495e-05, + "loss": 0.1674, + "step": 2943, + "teacher_loss": 0.16604915261268616 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.2604311406612396, + "learning_rate": 1.2768541275119271e-05, + "loss": 0.1591, + "step": 2944, + "teacher_loss": 0.14781039953231812 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.5312840342521667, + "learning_rate": 1.2772878415498049e-05, + "loss": 0.2639, + "step": 2945, + "teacher_loss": 0.23418551683425903 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.3973923921585083, + "learning_rate": 1.2777215555876825e-05, + "loss": 0.1599, + "step": 2946, + "teacher_loss": 0.13354821503162384 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.19833387434482574, + "learning_rate": 1.2781552696255602e-05, + "loss": 0.1875, + "step": 2947, + "teacher_loss": 0.18631184101104736 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.5400410294532776, + "learning_rate": 1.278588983663438e-05, + "loss": 0.2465, + "step": 2948, + "teacher_loss": 0.21387764811515808 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.7205885648727417, + "learning_rate": 1.2790226977013158e-05, + "loss": 0.2289, + "step": 2949, + "teacher_loss": 0.17426863312721252 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.42183375358581543, + "learning_rate": 1.2794564117391932e-05, + "loss": 0.2627, + "step": 2950, + "teacher_loss": 0.24499329924583435 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.2620997130870819, + "learning_rate": 1.279890125777071e-05, + "loss": 0.2044, + "step": 2951, + "teacher_loss": 0.1979558765888214 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.36855006217956543, + "learning_rate": 1.2803238398149487e-05, + "loss": 0.2096, + "step": 2952, + "teacher_loss": 0.19195443391799927 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.21986077725887299, + "learning_rate": 1.2807575538528265e-05, + "loss": 0.2023, + "step": 2953, + "teacher_loss": 0.20036692917346954 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.45694780349731445, + "learning_rate": 1.2811912678907042e-05, + "loss": 0.254, + "step": 2954, + "teacher_loss": 0.23141643404960632 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.49373775720596313, + "learning_rate": 1.2816249819285817e-05, + "loss": 0.2591, + "step": 2955, + "teacher_loss": 0.23300310969352722 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.47111624479293823, + "learning_rate": 1.2820586959664594e-05, + "loss": 0.2297, + "step": 2956, + "teacher_loss": 0.20282186567783356 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.8163066506385803, + "learning_rate": 1.2824924100043372e-05, + "loss": 0.373, + "step": 2957, + "teacher_loss": 0.3237210512161255 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.7496196031570435, + "learning_rate": 1.282926124042215e-05, + "loss": 0.2933, + "step": 2958, + "teacher_loss": 0.2425549030303955 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.45043647289276123, + "learning_rate": 1.2833598380800925e-05, + "loss": 0.282, + "step": 2959, + "teacher_loss": 0.26325374841690063 + }, + { + "compression_loss": 0.0, + "epoch": 0.53, + "label_loss": 0.695798397064209, + "learning_rate": 1.2837935521179703e-05, + "loss": 0.2972, + "step": 2960, + "teacher_loss": 0.25294405221939087 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.6354823708534241, + "learning_rate": 1.2842272661558479e-05, + "loss": 0.2207, + "step": 2961, + "teacher_loss": 0.17458616197109222 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.26955729722976685, + "learning_rate": 1.2846609801937257e-05, + "loss": 0.2109, + "step": 2962, + "teacher_loss": 0.20439890027046204 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.39085984230041504, + "learning_rate": 1.2850946942316033e-05, + "loss": 0.2908, + "step": 2963, + "teacher_loss": 0.27965545654296875 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.3037847876548767, + "learning_rate": 1.285528408269481e-05, + "loss": 0.1976, + "step": 2964, + "teacher_loss": 0.18583178520202637 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5035691261291504, + "learning_rate": 1.2859621223073588e-05, + "loss": 0.2172, + "step": 2965, + "teacher_loss": 0.18540005385875702 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.1723177433013916, + "learning_rate": 1.2863958363452364e-05, + "loss": 0.1522, + "step": 2966, + "teacher_loss": 0.149988055229187 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.3799302875995636, + "learning_rate": 1.2868295503831141e-05, + "loss": 0.2547, + "step": 2967, + "teacher_loss": 0.24080045521259308 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.1720074862241745, + "learning_rate": 1.2872632644209917e-05, + "loss": 0.1863, + "step": 2968, + "teacher_loss": 0.18787409365177155 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.6693521738052368, + "learning_rate": 1.2876969784588695e-05, + "loss": 0.2875, + "step": 2969, + "teacher_loss": 0.24511204659938812 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.08292323350906372, + "learning_rate": 1.2881306924967473e-05, + "loss": 0.1411, + "step": 2970, + "teacher_loss": 0.14757077395915985 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.7695428133010864, + "learning_rate": 1.288564406534625e-05, + "loss": 0.5112, + "step": 2971, + "teacher_loss": 0.48253825306892395 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.419270783662796, + "learning_rate": 1.2889981205725024e-05, + "loss": 0.2384, + "step": 2972, + "teacher_loss": 0.21833764016628265 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5678366422653198, + "learning_rate": 1.2894318346103802e-05, + "loss": 0.3143, + "step": 2973, + "teacher_loss": 0.2861189842224121 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.6132630705833435, + "learning_rate": 1.289865548648258e-05, + "loss": 0.5098, + "step": 2974, + "teacher_loss": 0.4983125329017639 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.4667607247829437, + "learning_rate": 1.2902992626861357e-05, + "loss": 0.2189, + "step": 2975, + "teacher_loss": 0.19137075543403625 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.29011255502700806, + "learning_rate": 1.2907329767240135e-05, + "loss": 0.2022, + "step": 2976, + "teacher_loss": 0.19248229265213013 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5009244680404663, + "learning_rate": 1.2911666907618909e-05, + "loss": 0.2319, + "step": 2977, + "teacher_loss": 0.202016219496727 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.8742498159408569, + "learning_rate": 1.2916004047997687e-05, + "loss": 0.3398, + "step": 2978, + "teacher_loss": 0.28036707639694214 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.429987370967865, + "learning_rate": 1.2920341188376464e-05, + "loss": 0.2193, + "step": 2979, + "teacher_loss": 0.19584423303604126 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.4443659782409668, + "learning_rate": 1.2924678328755242e-05, + "loss": 0.2459, + "step": 2980, + "teacher_loss": 0.22390232980251312 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5653756856918335, + "learning_rate": 1.2929015469134018e-05, + "loss": 0.255, + "step": 2981, + "teacher_loss": 0.22055259346961975 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.9079474210739136, + "learning_rate": 1.2933352609512794e-05, + "loss": 0.2699, + "step": 2982, + "teacher_loss": 0.1990591436624527 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.9636232852935791, + "learning_rate": 1.2937689749891571e-05, + "loss": 0.3, + "step": 2983, + "teacher_loss": 0.2263181209564209 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.9298241138458252, + "learning_rate": 1.2942026890270349e-05, + "loss": 0.4061, + "step": 2984, + "teacher_loss": 0.3479520082473755 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.795227587223053, + "learning_rate": 1.2946364030649125e-05, + "loss": 0.2828, + "step": 2985, + "teacher_loss": 0.22584865987300873 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.7138127088546753, + "learning_rate": 1.2950701171027903e-05, + "loss": 0.3002, + "step": 2986, + "teacher_loss": 0.2542181611061096 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.32672247290611267, + "learning_rate": 1.295503831140668e-05, + "loss": 0.182, + "step": 2987, + "teacher_loss": 0.16587606072425842 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.32905805110931396, + "learning_rate": 1.2959375451785456e-05, + "loss": 0.2375, + "step": 2988, + "teacher_loss": 0.22734344005584717 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.20300397276878357, + "learning_rate": 1.2963712592164234e-05, + "loss": 0.1999, + "step": 2989, + "teacher_loss": 0.19954471290111542 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.31182849407196045, + "learning_rate": 1.296804973254301e-05, + "loss": 0.276, + "step": 2990, + "teacher_loss": 0.27205315232276917 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.3289722800254822, + "learning_rate": 1.2972386872921787e-05, + "loss": 0.2052, + "step": 2991, + "teacher_loss": 0.19146430492401123 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.13437870144844055, + "learning_rate": 1.2976724013300565e-05, + "loss": 0.1561, + "step": 2992, + "teacher_loss": 0.15848934650421143 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.3751770555973053, + "learning_rate": 1.2981061153679341e-05, + "loss": 0.291, + "step": 2993, + "teacher_loss": 0.2816421389579773 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.43278443813323975, + "learning_rate": 1.2985398294058117e-05, + "loss": 0.316, + "step": 2994, + "teacher_loss": 0.3030541241168976 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5744333267211914, + "learning_rate": 1.2989735434436894e-05, + "loss": 0.2895, + "step": 2995, + "teacher_loss": 0.2577863335609436 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.43087315559387207, + "learning_rate": 1.2994072574815672e-05, + "loss": 0.2658, + "step": 2996, + "teacher_loss": 0.24747687578201294 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.26595252752304077, + "learning_rate": 1.299840971519445e-05, + "loss": 0.1704, + "step": 2997, + "teacher_loss": 0.1597684770822525 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.7176780700683594, + "learning_rate": 1.3002746855573227e-05, + "loss": 0.3263, + "step": 2998, + "teacher_loss": 0.2828543782234192 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.4191933274269104, + "learning_rate": 1.3007083995952002e-05, + "loss": 0.2579, + "step": 2999, + "teacher_loss": 0.24002447724342346 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.6982427835464478, + "learning_rate": 1.3011421136330779e-05, + "loss": 0.2968, + "step": 3000, + "teacher_loss": 0.25222811102867126 + }, + { + "epoch": 0.54, + "eval_exact_match": 79.94323557237465, + "eval_f1": 87.31618470192804, + "step": 3000 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.2842622399330139, + "learning_rate": 1.3015758276709557e-05, + "loss": 0.2971, + "step": 3001, + "teacher_loss": 0.2985485792160034 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.30096569657325745, + "learning_rate": 1.3020095417088334e-05, + "loss": 0.1854, + "step": 3002, + "teacher_loss": 0.17260706424713135 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.2157330960035324, + "learning_rate": 1.302443255746711e-05, + "loss": 0.1485, + "step": 3003, + "teacher_loss": 0.1410660594701767 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.7136178612709045, + "learning_rate": 1.3028769697845886e-05, + "loss": 0.2387, + "step": 3004, + "teacher_loss": 0.18590174615383148 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.6043393611907959, + "learning_rate": 1.3033106838224664e-05, + "loss": 0.3809, + "step": 3005, + "teacher_loss": 0.35605260729789734 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.3761269152164459, + "learning_rate": 1.3037443978603441e-05, + "loss": 0.205, + "step": 3006, + "teacher_loss": 0.1859891414642334 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.6790250539779663, + "learning_rate": 1.3041781118982217e-05, + "loss": 0.3205, + "step": 3007, + "teacher_loss": 0.28062334656715393 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.25974640250205994, + "learning_rate": 1.3046118259360995e-05, + "loss": 0.2005, + "step": 3008, + "teacher_loss": 0.19389094412326813 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.4829614460468292, + "learning_rate": 1.3050455399739773e-05, + "loss": 0.2746, + "step": 3009, + "teacher_loss": 0.25148075819015503 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5688278675079346, + "learning_rate": 1.3054792540118549e-05, + "loss": 0.2741, + "step": 3010, + "teacher_loss": 0.2413042187690735 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.4807244539260864, + "learning_rate": 1.3059129680497326e-05, + "loss": 0.3191, + "step": 3011, + "teacher_loss": 0.3011924624443054 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.5908994078636169, + "learning_rate": 1.3063466820876102e-05, + "loss": 0.2956, + "step": 3012, + "teacher_loss": 0.2627811133861542 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.3035929799079895, + "learning_rate": 1.306780396125488e-05, + "loss": 0.1878, + "step": 3013, + "teacher_loss": 0.17493954300880432 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.21108703315258026, + "learning_rate": 1.3072141101633657e-05, + "loss": 0.2615, + "step": 3014, + "teacher_loss": 0.2671399414539337 + }, + { + "compression_loss": 0.0, + "epoch": 0.54, + "label_loss": 0.10633372515439987, + "learning_rate": 1.3076478242012433e-05, + "loss": 0.169, + "step": 3015, + "teacher_loss": 0.17599095404148102 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4860968589782715, + "learning_rate": 1.308081538239121e-05, + "loss": 0.2296, + "step": 3016, + "teacher_loss": 0.20113936066627502 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3239452838897705, + "learning_rate": 1.3085152522769987e-05, + "loss": 0.2393, + "step": 3017, + "teacher_loss": 0.22985979914665222 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.7383592128753662, + "learning_rate": 1.3089489663148764e-05, + "loss": 0.258, + "step": 3018, + "teacher_loss": 0.20466801524162292 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3263548016548157, + "learning_rate": 1.3093826803527542e-05, + "loss": 0.2129, + "step": 3019, + "teacher_loss": 0.20030061900615692 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3775064945220947, + "learning_rate": 1.309816394390632e-05, + "loss": 0.1967, + "step": 3020, + "teacher_loss": 0.17665760219097137 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5024935007095337, + "learning_rate": 1.3102501084285094e-05, + "loss": 0.2949, + "step": 3021, + "teacher_loss": 0.27180016040802 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3055901825428009, + "learning_rate": 1.3106838224663872e-05, + "loss": 0.1727, + "step": 3022, + "teacher_loss": 0.15798087418079376 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3656060993671417, + "learning_rate": 1.311117536504265e-05, + "loss": 0.2292, + "step": 3023, + "teacher_loss": 0.2140159159898758 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3799038529396057, + "learning_rate": 1.3115512505421427e-05, + "loss": 0.2787, + "step": 3024, + "teacher_loss": 0.2675032615661621 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.21871453523635864, + "learning_rate": 1.3119849645800203e-05, + "loss": 0.2737, + "step": 3025, + "teacher_loss": 0.2798258066177368 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.30443406105041504, + "learning_rate": 1.3124186786178979e-05, + "loss": 0.1568, + "step": 3026, + "teacher_loss": 0.14043620228767395 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.2812073826789856, + "learning_rate": 1.3128523926557756e-05, + "loss": 0.1861, + "step": 3027, + "teacher_loss": 0.17554882168769836 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.23737600445747375, + "learning_rate": 1.3132861066936534e-05, + "loss": 0.1734, + "step": 3028, + "teacher_loss": 0.16628527641296387 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.37612712383270264, + "learning_rate": 1.313719820731531e-05, + "loss": 0.1981, + "step": 3029, + "teacher_loss": 0.1783452332019806 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5463454723358154, + "learning_rate": 1.3141535347694087e-05, + "loss": 0.317, + "step": 3030, + "teacher_loss": 0.29153114557266235 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.2918059229850769, + "learning_rate": 1.3145872488072863e-05, + "loss": 0.1992, + "step": 3031, + "teacher_loss": 0.18888552486896515 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.21595440804958344, + "learning_rate": 1.3150209628451641e-05, + "loss": 0.1933, + "step": 3032, + "teacher_loss": 0.19076672196388245 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.6261675357818604, + "learning_rate": 1.3154546768830419e-05, + "loss": 0.2623, + "step": 3033, + "teacher_loss": 0.22182686626911163 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5985780954360962, + "learning_rate": 1.3158883909209195e-05, + "loss": 0.2902, + "step": 3034, + "teacher_loss": 0.2559766173362732 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3944920003414154, + "learning_rate": 1.3163221049587972e-05, + "loss": 0.2814, + "step": 3035, + "teacher_loss": 0.2687896490097046 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.6342904567718506, + "learning_rate": 1.316755818996675e-05, + "loss": 0.3017, + "step": 3036, + "teacher_loss": 0.2647266089916229 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.341489315032959, + "learning_rate": 1.3171895330345526e-05, + "loss": 0.2288, + "step": 3037, + "teacher_loss": 0.21629779040813446 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.37064623832702637, + "learning_rate": 1.3176232470724302e-05, + "loss": 0.3243, + "step": 3038, + "teacher_loss": 0.31920325756073 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.24229909479618073, + "learning_rate": 1.318056961110308e-05, + "loss": 0.1651, + "step": 3039, + "teacher_loss": 0.15657153725624084 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.20493543148040771, + "learning_rate": 1.3184906751481857e-05, + "loss": 0.2168, + "step": 3040, + "teacher_loss": 0.2180669754743576 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4140383005142212, + "learning_rate": 1.3189243891860635e-05, + "loss": 0.2481, + "step": 3041, + "teacher_loss": 0.22965312004089355 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.6068315505981445, + "learning_rate": 1.319358103223941e-05, + "loss": 0.2632, + "step": 3042, + "teacher_loss": 0.22502192854881287 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4939553141593933, + "learning_rate": 1.3197918172618186e-05, + "loss": 0.2376, + "step": 3043, + "teacher_loss": 0.20913271605968475 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4232443571090698, + "learning_rate": 1.3202255312996964e-05, + "loss": 0.2619, + "step": 3044, + "teacher_loss": 0.24401313066482544 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4562731981277466, + "learning_rate": 1.3206592453375742e-05, + "loss": 0.2871, + "step": 3045, + "teacher_loss": 0.2682906985282898 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.2555849552154541, + "learning_rate": 1.321092959375452e-05, + "loss": 0.2005, + "step": 3046, + "teacher_loss": 0.1944049447774887 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.26248323917388916, + "learning_rate": 1.3215266734133295e-05, + "loss": 0.2798, + "step": 3047, + "teacher_loss": 0.28168004751205444 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4334351420402527, + "learning_rate": 1.3219603874512071e-05, + "loss": 0.2543, + "step": 3048, + "teacher_loss": 0.23443761467933655 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3107101023197174, + "learning_rate": 1.3223941014890849e-05, + "loss": 0.1804, + "step": 3049, + "teacher_loss": 0.16587281227111816 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.27195990085601807, + "learning_rate": 1.3228278155269626e-05, + "loss": 0.2028, + "step": 3050, + "teacher_loss": 0.1951400339603424 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.14790582656860352, + "learning_rate": 1.3232615295648402e-05, + "loss": 0.1787, + "step": 3051, + "teacher_loss": 0.1821078360080719 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.6512551307678223, + "learning_rate": 1.323695243602718e-05, + "loss": 0.418, + "step": 3052, + "teacher_loss": 0.39210885763168335 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.26082876324653625, + "learning_rate": 1.3241289576405956e-05, + "loss": 0.2952, + "step": 3053, + "teacher_loss": 0.29897040128707886 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.6199743747711182, + "learning_rate": 1.3245626716784733e-05, + "loss": 0.2705, + "step": 3054, + "teacher_loss": 0.2316872775554657 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3557511568069458, + "learning_rate": 1.3249963857163511e-05, + "loss": 0.2101, + "step": 3055, + "teacher_loss": 0.19395655393600464 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.20252348482608795, + "learning_rate": 1.3254300997542287e-05, + "loss": 0.1899, + "step": 3056, + "teacher_loss": 0.18853795528411865 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.3717423975467682, + "learning_rate": 1.3258638137921065e-05, + "loss": 0.2233, + "step": 3057, + "teacher_loss": 0.2068050503730774 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.30125415325164795, + "learning_rate": 1.3262975278299842e-05, + "loss": 0.252, + "step": 3058, + "teacher_loss": 0.24657300114631653 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.2846919894218445, + "learning_rate": 1.3267312418678618e-05, + "loss": 0.236, + "step": 3059, + "teacher_loss": 0.23061969876289368 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.27283358573913574, + "learning_rate": 1.3271649559057394e-05, + "loss": 0.2082, + "step": 3060, + "teacher_loss": 0.20102459192276 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.27168700098991394, + "learning_rate": 1.3275986699436172e-05, + "loss": 0.2267, + "step": 3061, + "teacher_loss": 0.22175222635269165 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.13136842846870422, + "learning_rate": 1.328032383981495e-05, + "loss": 0.1728, + "step": 3062, + "teacher_loss": 0.17739540338516235 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.4940868616104126, + "learning_rate": 1.3284660980193727e-05, + "loss": 0.2778, + "step": 3063, + "teacher_loss": 0.25375664234161377 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.1723693311214447, + "learning_rate": 1.3288998120572503e-05, + "loss": 0.1601, + "step": 3064, + "teacher_loss": 0.15872237086296082 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.2709307074546814, + "learning_rate": 1.3293335260951279e-05, + "loss": 0.1732, + "step": 3065, + "teacher_loss": 0.16239234805107117 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5543541312217712, + "learning_rate": 1.3297672401330056e-05, + "loss": 0.2946, + "step": 3066, + "teacher_loss": 0.26571282744407654 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5398831367492676, + "learning_rate": 1.3302009541708834e-05, + "loss": 0.2352, + "step": 3067, + "teacher_loss": 0.20139722526073456 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5789247751235962, + "learning_rate": 1.3306346682087612e-05, + "loss": 0.2728, + "step": 3068, + "teacher_loss": 0.23879292607307434 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.5951073169708252, + "learning_rate": 1.3310683822466388e-05, + "loss": 0.2471, + "step": 3069, + "teacher_loss": 0.20844855904579163 + }, + { + "compression_loss": 0.0, + "epoch": 0.55, + "label_loss": 0.487831711769104, + "learning_rate": 1.3315020962845164e-05, + "loss": 0.2619, + "step": 3070, + "teacher_loss": 0.23681041598320007 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.7076802849769592, + "learning_rate": 1.3319358103223941e-05, + "loss": 0.2653, + "step": 3071, + "teacher_loss": 0.2161283940076828 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.7985213994979858, + "learning_rate": 1.3323695243602719e-05, + "loss": 0.3906, + "step": 3072, + "teacher_loss": 0.3452828824520111 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.4687846302986145, + "learning_rate": 1.3328032383981495e-05, + "loss": 0.2227, + "step": 3073, + "teacher_loss": 0.19536671042442322 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.35332876443862915, + "learning_rate": 1.3332369524360272e-05, + "loss": 0.1849, + "step": 3074, + "teacher_loss": 0.1662396490573883 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.4023820161819458, + "learning_rate": 1.3336706664739048e-05, + "loss": 0.2046, + "step": 3075, + "teacher_loss": 0.18261641263961792 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.27179956436157227, + "learning_rate": 1.3341043805117826e-05, + "loss": 0.2106, + "step": 3076, + "teacher_loss": 0.2037510722875595 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.48693007230758667, + "learning_rate": 1.3345380945496604e-05, + "loss": 0.2661, + "step": 3077, + "teacher_loss": 0.2415974885225296 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.6925801038742065, + "learning_rate": 1.334971808587538e-05, + "loss": 0.2513, + "step": 3078, + "teacher_loss": 0.2022903561592102 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.14765048027038574, + "learning_rate": 1.3354055226254157e-05, + "loss": 0.1714, + "step": 3079, + "teacher_loss": 0.17400771379470825 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.681926965713501, + "learning_rate": 1.3358392366632933e-05, + "loss": 0.2991, + "step": 3080, + "teacher_loss": 0.2566143274307251 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2558896541595459, + "learning_rate": 1.336272950701171e-05, + "loss": 0.2099, + "step": 3081, + "teacher_loss": 0.20477989315986633 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2703326940536499, + "learning_rate": 1.3367066647390487e-05, + "loss": 0.1562, + "step": 3082, + "teacher_loss": 0.14357003569602966 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.36100858449935913, + "learning_rate": 1.3371403787769264e-05, + "loss": 0.2037, + "step": 3083, + "teacher_loss": 0.1861998438835144 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.4562470614910126, + "learning_rate": 1.3375740928148042e-05, + "loss": 0.2177, + "step": 3084, + "teacher_loss": 0.19119438529014587 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.34820556640625, + "learning_rate": 1.338007806852682e-05, + "loss": 0.1766, + "step": 3085, + "teacher_loss": 0.1574985235929489 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2374558448791504, + "learning_rate": 1.3384415208905595e-05, + "loss": 0.163, + "step": 3086, + "teacher_loss": 0.15469534695148468 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2202252447605133, + "learning_rate": 1.3388752349284371e-05, + "loss": 0.2416, + "step": 3087, + "teacher_loss": 0.2439364194869995 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.49908357858657837, + "learning_rate": 1.3393089489663149e-05, + "loss": 0.2725, + "step": 3088, + "teacher_loss": 0.24732965230941772 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.8662938475608826, + "learning_rate": 1.3397426630041927e-05, + "loss": 0.38, + "step": 3089, + "teacher_loss": 0.3260199725627899 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.14701521396636963, + "learning_rate": 1.3401763770420704e-05, + "loss": 0.1682, + "step": 3090, + "teacher_loss": 0.170506089925766 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2729068100452423, + "learning_rate": 1.3406100910799478e-05, + "loss": 0.1817, + "step": 3091, + "teacher_loss": 0.171578049659729 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.7264319658279419, + "learning_rate": 1.3410438051178256e-05, + "loss": 0.3385, + "step": 3092, + "teacher_loss": 0.2953737676143646 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.44986459612846375, + "learning_rate": 1.3414775191557034e-05, + "loss": 0.228, + "step": 3093, + "teacher_loss": 0.2033209204673767 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.8176664113998413, + "learning_rate": 1.3419112331935811e-05, + "loss": 0.2287, + "step": 3094, + "teacher_loss": 0.16329774260520935 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.36958047747612, + "learning_rate": 1.3423449472314589e-05, + "loss": 0.1774, + "step": 3095, + "teacher_loss": 0.15599533915519714 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.8095451593399048, + "learning_rate": 1.3427786612693365e-05, + "loss": 0.3054, + "step": 3096, + "teacher_loss": 0.24937686324119568 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.31955987215042114, + "learning_rate": 1.343212375307214e-05, + "loss": 0.1962, + "step": 3097, + "teacher_loss": 0.18253986537456512 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.3793798089027405, + "learning_rate": 1.3436460893450918e-05, + "loss": 0.2852, + "step": 3098, + "teacher_loss": 0.2747170031070709 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2132205069065094, + "learning_rate": 1.3440798033829696e-05, + "loss": 0.1987, + "step": 3099, + "teacher_loss": 0.19707725942134857 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.7296171188354492, + "learning_rate": 1.3445135174208472e-05, + "loss": 0.2495, + "step": 3100, + "teacher_loss": 0.196100652217865 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.633063793182373, + "learning_rate": 1.344947231458725e-05, + "loss": 0.2454, + "step": 3101, + "teacher_loss": 0.20230242609977722 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.25027191638946533, + "learning_rate": 1.3453809454966025e-05, + "loss": 0.3152, + "step": 3102, + "teacher_loss": 0.32245850563049316 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.29171448945999146, + "learning_rate": 1.3458146595344803e-05, + "loss": 0.2663, + "step": 3103, + "teacher_loss": 0.26343053579330444 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2954946756362915, + "learning_rate": 1.3462483735723579e-05, + "loss": 0.2435, + "step": 3104, + "teacher_loss": 0.2377127707004547 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.5202397108078003, + "learning_rate": 1.3466820876102357e-05, + "loss": 0.2237, + "step": 3105, + "teacher_loss": 0.19072537124156952 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.7288155555725098, + "learning_rate": 1.3471158016481134e-05, + "loss": 0.2913, + "step": 3106, + "teacher_loss": 0.24267855286598206 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.3162734806537628, + "learning_rate": 1.3475495156859912e-05, + "loss": 0.2385, + "step": 3107, + "teacher_loss": 0.22986571490764618 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.8069810271263123, + "learning_rate": 1.3479832297238688e-05, + "loss": 0.2565, + "step": 3108, + "teacher_loss": 0.19535022974014282 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.41321685910224915, + "learning_rate": 1.3484169437617464e-05, + "loss": 0.1877, + "step": 3109, + "teacher_loss": 0.16265472769737244 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.26473525166511536, + "learning_rate": 1.3488506577996241e-05, + "loss": 0.1824, + "step": 3110, + "teacher_loss": 0.1732155680656433 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.3609674572944641, + "learning_rate": 1.3492843718375019e-05, + "loss": 0.2285, + "step": 3111, + "teacher_loss": 0.21373626589775085 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.26164525747299194, + "learning_rate": 1.3497180858753797e-05, + "loss": 0.1918, + "step": 3112, + "teacher_loss": 0.184035524725914 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.3896588087081909, + "learning_rate": 1.350151799913257e-05, + "loss": 0.2141, + "step": 3113, + "teacher_loss": 0.19460904598236084 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.5366600751876831, + "learning_rate": 1.3505855139511348e-05, + "loss": 0.2662, + "step": 3114, + "teacher_loss": 0.2361549735069275 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.26788294315338135, + "learning_rate": 1.3510192279890126e-05, + "loss": 0.19, + "step": 3115, + "teacher_loss": 0.18137764930725098 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.07358293980360031, + "learning_rate": 1.3514529420268904e-05, + "loss": 0.1957, + "step": 3116, + "teacher_loss": 0.2093045711517334 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.5157208442687988, + "learning_rate": 1.3518866560647681e-05, + "loss": 0.2848, + "step": 3117, + "teacher_loss": 0.25917455554008484 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.21517476439476013, + "learning_rate": 1.3523203701026457e-05, + "loss": 0.3121, + "step": 3118, + "teacher_loss": 0.3228638470172882 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.16703195869922638, + "learning_rate": 1.3527540841405233e-05, + "loss": 0.2099, + "step": 3119, + "teacher_loss": 0.21471600234508514 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2685527801513672, + "learning_rate": 1.353187798178401e-05, + "loss": 0.2036, + "step": 3120, + "teacher_loss": 0.1963837742805481 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.2674502432346344, + "learning_rate": 1.3536215122162788e-05, + "loss": 0.2638, + "step": 3121, + "teacher_loss": 0.26343005895614624 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.45980197191238403, + "learning_rate": 1.3540552262541564e-05, + "loss": 0.3683, + "step": 3122, + "teacher_loss": 0.35817086696624756 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 1.1843032836914062, + "learning_rate": 1.3544889402920342e-05, + "loss": 0.3411, + "step": 3123, + "teacher_loss": 0.24737712740898132 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.3766411542892456, + "learning_rate": 1.3549226543299118e-05, + "loss": 0.2733, + "step": 3124, + "teacher_loss": 0.2618699073791504 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.874194860458374, + "learning_rate": 1.3553563683677896e-05, + "loss": 0.2646, + "step": 3125, + "teacher_loss": 0.1969054490327835 + }, + { + "compression_loss": 0.0, + "epoch": 0.56, + "label_loss": 0.36310744285583496, + "learning_rate": 1.3557900824056671e-05, + "loss": 0.2188, + "step": 3126, + "teacher_loss": 0.20271845161914825 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.4812440872192383, + "learning_rate": 1.3562237964435449e-05, + "loss": 0.1968, + "step": 3127, + "teacher_loss": 0.1652371883392334 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.20630908012390137, + "learning_rate": 1.3566575104814227e-05, + "loss": 0.2092, + "step": 3128, + "teacher_loss": 0.20946836471557617 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.38478463888168335, + "learning_rate": 1.3570912245193003e-05, + "loss": 0.2139, + "step": 3129, + "teacher_loss": 0.19489642977714539 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3425915539264679, + "learning_rate": 1.357524938557178e-05, + "loss": 0.2832, + "step": 3130, + "teacher_loss": 0.27660250663757324 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.9745980501174927, + "learning_rate": 1.3579586525950556e-05, + "loss": 0.2874, + "step": 3131, + "teacher_loss": 0.21106813848018646 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5787369608879089, + "learning_rate": 1.3583923666329334e-05, + "loss": 0.2073, + "step": 3132, + "teacher_loss": 0.16603747010231018 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.6792896389961243, + "learning_rate": 1.3588260806708111e-05, + "loss": 0.2626, + "step": 3133, + "teacher_loss": 0.21633067727088928 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3057715892791748, + "learning_rate": 1.3592597947086889e-05, + "loss": 0.1739, + "step": 3134, + "teacher_loss": 0.15922175347805023 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.49490582942962646, + "learning_rate": 1.3596935087465663e-05, + "loss": 0.3221, + "step": 3135, + "teacher_loss": 0.3028768301010132 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.30658990144729614, + "learning_rate": 1.3601272227844441e-05, + "loss": 0.2477, + "step": 3136, + "teacher_loss": 0.24117395281791687 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.24417918920516968, + "learning_rate": 1.3605609368223218e-05, + "loss": 0.2684, + "step": 3137, + "teacher_loss": 0.27106496691703796 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.6911215782165527, + "learning_rate": 1.3609946508601996e-05, + "loss": 0.2589, + "step": 3138, + "teacher_loss": 0.2108924835920334 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.2780971825122833, + "learning_rate": 1.3614283648980774e-05, + "loss": 0.2289, + "step": 3139, + "teacher_loss": 0.22347579896450043 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.35225605964660645, + "learning_rate": 1.3618620789359548e-05, + "loss": 0.3281, + "step": 3140, + "teacher_loss": 0.3254607617855072 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.529262900352478, + "learning_rate": 1.3622957929738326e-05, + "loss": 0.2421, + "step": 3141, + "teacher_loss": 0.21015891432762146 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.303768128156662, + "learning_rate": 1.3627295070117103e-05, + "loss": 0.173, + "step": 3142, + "teacher_loss": 0.15846604108810425 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.21878615021705627, + "learning_rate": 1.363163221049588e-05, + "loss": 0.1914, + "step": 3143, + "teacher_loss": 0.18833668529987335 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.2296508252620697, + "learning_rate": 1.3635969350874657e-05, + "loss": 0.1651, + "step": 3144, + "teacher_loss": 0.15793374180793762 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.48295897245407104, + "learning_rate": 1.3640306491253434e-05, + "loss": 0.2587, + "step": 3145, + "teacher_loss": 0.23380841314792633 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.8142374753952026, + "learning_rate": 1.364464363163221e-05, + "loss": 0.3286, + "step": 3146, + "teacher_loss": 0.27459973096847534 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.45030349493026733, + "learning_rate": 1.3648980772010988e-05, + "loss": 0.2732, + "step": 3147, + "teacher_loss": 0.2535628080368042 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5232150554656982, + "learning_rate": 1.3653317912389764e-05, + "loss": 0.2784, + "step": 3148, + "teacher_loss": 0.2511853575706482 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.31167078018188477, + "learning_rate": 1.3657655052768541e-05, + "loss": 0.2377, + "step": 3149, + "teacher_loss": 0.22949160635471344 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.15156973898410797, + "learning_rate": 1.3661992193147319e-05, + "loss": 0.2326, + "step": 3150, + "teacher_loss": 0.24163369834423065 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3795466423034668, + "learning_rate": 1.3666329333526095e-05, + "loss": 0.2285, + "step": 3151, + "teacher_loss": 0.2116793692111969 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.32396993041038513, + "learning_rate": 1.3670666473904873e-05, + "loss": 0.2179, + "step": 3152, + "teacher_loss": 0.20613354444503784 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.600192666053772, + "learning_rate": 1.3675003614283649e-05, + "loss": 0.2238, + "step": 3153, + "teacher_loss": 0.1819387674331665 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.597110390663147, + "learning_rate": 1.3679340754662426e-05, + "loss": 0.3018, + "step": 3154, + "teacher_loss": 0.2690383791923523 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5059059262275696, + "learning_rate": 1.3683677895041204e-05, + "loss": 0.2716, + "step": 3155, + "teacher_loss": 0.24560335278511047 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 1.1677502393722534, + "learning_rate": 1.3688015035419981e-05, + "loss": 0.4041, + "step": 3156, + "teacher_loss": 0.3192659914493561 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.518955647945404, + "learning_rate": 1.3692352175798756e-05, + "loss": 0.2569, + "step": 3157, + "teacher_loss": 0.2277565896511078 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 1.4068934917449951, + "learning_rate": 1.3696689316177533e-05, + "loss": 0.2939, + "step": 3158, + "teacher_loss": 0.17018303275108337 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.2963440418243408, + "learning_rate": 1.3701026456556311e-05, + "loss": 0.2047, + "step": 3159, + "teacher_loss": 0.19454213976860046 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.2932714819908142, + "learning_rate": 1.3705363596935089e-05, + "loss": 0.172, + "step": 3160, + "teacher_loss": 0.15849372744560242 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.4205343723297119, + "learning_rate": 1.3709700737313866e-05, + "loss": 0.243, + "step": 3161, + "teacher_loss": 0.22322650253772736 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.42705249786376953, + "learning_rate": 1.371403787769264e-05, + "loss": 0.3046, + "step": 3162, + "teacher_loss": 0.29094648361206055 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5534102916717529, + "learning_rate": 1.3718375018071418e-05, + "loss": 0.2744, + "step": 3163, + "teacher_loss": 0.2434028685092926 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.24196718633174896, + "learning_rate": 1.3722712158450196e-05, + "loss": 0.1735, + "step": 3164, + "teacher_loss": 0.16590993106365204 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.1688467413187027, + "learning_rate": 1.3727049298828973e-05, + "loss": 0.1352, + "step": 3165, + "teacher_loss": 0.1314394772052765 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.7563605904579163, + "learning_rate": 1.373138643920775e-05, + "loss": 0.472, + "step": 3166, + "teacher_loss": 0.4404444396495819 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.436075896024704, + "learning_rate": 1.3735723579586527e-05, + "loss": 0.2833, + "step": 3167, + "teacher_loss": 0.26631230115890503 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.4914531111717224, + "learning_rate": 1.3740060719965303e-05, + "loss": 0.2927, + "step": 3168, + "teacher_loss": 0.2706148624420166 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.44022417068481445, + "learning_rate": 1.374439786034408e-05, + "loss": 0.3839, + "step": 3169, + "teacher_loss": 0.37767693400382996 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.29825326800346375, + "learning_rate": 1.3748735000722856e-05, + "loss": 0.2196, + "step": 3170, + "teacher_loss": 0.21085938811302185 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5327640175819397, + "learning_rate": 1.3753072141101634e-05, + "loss": 0.2661, + "step": 3171, + "teacher_loss": 0.23651134967803955 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3903202414512634, + "learning_rate": 1.3757409281480412e-05, + "loss": 0.203, + "step": 3172, + "teacher_loss": 0.18213404715061188 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.32969826459884644, + "learning_rate": 1.3761746421859187e-05, + "loss": 0.3237, + "step": 3173, + "teacher_loss": 0.323083758354187 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3849000632762909, + "learning_rate": 1.3766083562237965e-05, + "loss": 0.2525, + "step": 3174, + "teacher_loss": 0.23778298497200012 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3668939769268036, + "learning_rate": 1.3770420702616741e-05, + "loss": 0.2593, + "step": 3175, + "teacher_loss": 0.24739089608192444 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.3109775185585022, + "learning_rate": 1.3774757842995519e-05, + "loss": 0.3024, + "step": 3176, + "teacher_loss": 0.3014335632324219 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.2276211380958557, + "learning_rate": 1.3779094983374296e-05, + "loss": 0.205, + "step": 3177, + "teacher_loss": 0.20243845880031586 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.2896766662597656, + "learning_rate": 1.3783432123753072e-05, + "loss": 0.2031, + "step": 3178, + "teacher_loss": 0.19349798560142517 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.26275524497032166, + "learning_rate": 1.3787769264131848e-05, + "loss": 0.2082, + "step": 3179, + "teacher_loss": 0.20211520791053772 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5010784268379211, + "learning_rate": 1.3792106404510626e-05, + "loss": 0.225, + "step": 3180, + "teacher_loss": 0.19430626928806305 + }, + { + "compression_loss": 0.0, + "epoch": 0.57, + "label_loss": 0.5018423795700073, + "learning_rate": 1.3796443544889403e-05, + "loss": 0.2318, + "step": 3181, + "teacher_loss": 0.2017618864774704 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.43978336453437805, + "learning_rate": 1.3800780685268181e-05, + "loss": 0.2633, + "step": 3182, + "teacher_loss": 0.24363601207733154 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5357323288917542, + "learning_rate": 1.3805117825646959e-05, + "loss": 0.205, + "step": 3183, + "teacher_loss": 0.16820865869522095 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.4715900421142578, + "learning_rate": 1.3809454966025733e-05, + "loss": 0.3507, + "step": 3184, + "teacher_loss": 0.3372383713722229 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.6615829467773438, + "learning_rate": 1.381379210640451e-05, + "loss": 0.2678, + "step": 3185, + "teacher_loss": 0.2240346372127533 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.8390197157859802, + "learning_rate": 1.3818129246783288e-05, + "loss": 0.335, + "step": 3186, + "teacher_loss": 0.2789629101753235 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5573806762695312, + "learning_rate": 1.3822466387162066e-05, + "loss": 0.2952, + "step": 3187, + "teacher_loss": 0.2661147117614746 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5123276710510254, + "learning_rate": 1.3826803527540842e-05, + "loss": 0.2683, + "step": 3188, + "teacher_loss": 0.2411530762910843 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.9935404062271118, + "learning_rate": 1.3831140667919618e-05, + "loss": 0.3157, + "step": 3189, + "teacher_loss": 0.24041594564914703 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.31871193647384644, + "learning_rate": 1.3835477808298395e-05, + "loss": 0.2388, + "step": 3190, + "teacher_loss": 0.22993820905685425 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.15627586841583252, + "learning_rate": 1.3839814948677173e-05, + "loss": 0.1539, + "step": 3191, + "teacher_loss": 0.15366145968437195 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.41014066338539124, + "learning_rate": 1.3844152089055949e-05, + "loss": 0.1942, + "step": 3192, + "teacher_loss": 0.17022094130516052 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.7192935347557068, + "learning_rate": 1.3848489229434726e-05, + "loss": 0.3478, + "step": 3193, + "teacher_loss": 0.30654144287109375 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.741270124912262, + "learning_rate": 1.3852826369813504e-05, + "loss": 0.3203, + "step": 3194, + "teacher_loss": 0.273525208234787 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5931705832481384, + "learning_rate": 1.385716351019228e-05, + "loss": 0.3113, + "step": 3195, + "teacher_loss": 0.2799602448940277 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.275505930185318, + "learning_rate": 1.3861500650571058e-05, + "loss": 0.1604, + "step": 3196, + "teacher_loss": 0.14755554497241974 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.2299749255180359, + "learning_rate": 1.3865837790949833e-05, + "loss": 0.1712, + "step": 3197, + "teacher_loss": 0.16462913155555725 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.3422144651412964, + "learning_rate": 1.3870174931328611e-05, + "loss": 0.1901, + "step": 3198, + "teacher_loss": 0.17318254709243774 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.47733044624328613, + "learning_rate": 1.3874512071707389e-05, + "loss": 0.3827, + "step": 3199, + "teacher_loss": 0.37215638160705566 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.37766045331954956, + "learning_rate": 1.3878849212086165e-05, + "loss": 0.2265, + "step": 3200, + "teacher_loss": 0.20968547463417053 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.2449851632118225, + "learning_rate": 1.388318635246494e-05, + "loss": 0.1866, + "step": 3201, + "teacher_loss": 0.1801028549671173 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.20897750556468964, + "learning_rate": 1.3887523492843718e-05, + "loss": 0.2275, + "step": 3202, + "teacher_loss": 0.22951540350914001 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.574865460395813, + "learning_rate": 1.3891860633222496e-05, + "loss": 0.2897, + "step": 3203, + "teacher_loss": 0.25800028443336487 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5864170789718628, + "learning_rate": 1.3896197773601273e-05, + "loss": 0.1943, + "step": 3204, + "teacher_loss": 0.15075168013572693 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5868630409240723, + "learning_rate": 1.3900534913980051e-05, + "loss": 0.3765, + "step": 3205, + "teacher_loss": 0.35307732224464417 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.32759028673171997, + "learning_rate": 1.3904872054358825e-05, + "loss": 0.2724, + "step": 3206, + "teacher_loss": 0.2662566304206848 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.46525177359580994, + "learning_rate": 1.3909209194737603e-05, + "loss": 0.2294, + "step": 3207, + "teacher_loss": 0.20324186980724335 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.30805495381355286, + "learning_rate": 1.391354633511638e-05, + "loss": 0.1859, + "step": 3208, + "teacher_loss": 0.17229008674621582 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5878300666809082, + "learning_rate": 1.3917883475495158e-05, + "loss": 0.5055, + "step": 3209, + "teacher_loss": 0.4963799715042114 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.4832676351070404, + "learning_rate": 1.3922220615873934e-05, + "loss": 0.3259, + "step": 3210, + "teacher_loss": 0.30845046043395996 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.1943540722131729, + "learning_rate": 1.392655775625271e-05, + "loss": 0.1747, + "step": 3211, + "teacher_loss": 0.17246964573860168 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.40365171432495117, + "learning_rate": 1.3930894896631488e-05, + "loss": 0.2419, + "step": 3212, + "teacher_loss": 0.2238958477973938 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.2506798505783081, + "learning_rate": 1.3935232037010265e-05, + "loss": 0.2503, + "step": 3213, + "teacher_loss": 0.250217080116272 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.20932012796401978, + "learning_rate": 1.3939569177389041e-05, + "loss": 0.2078, + "step": 3214, + "teacher_loss": 0.20760831236839294 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.6060541272163391, + "learning_rate": 1.3943906317767819e-05, + "loss": 0.3163, + "step": 3215, + "teacher_loss": 0.28407227993011475 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5119735598564148, + "learning_rate": 1.3948243458146596e-05, + "loss": 0.2351, + "step": 3216, + "teacher_loss": 0.20438314974308014 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.25984299182891846, + "learning_rate": 1.3952580598525372e-05, + "loss": 0.2145, + "step": 3217, + "teacher_loss": 0.2094123363494873 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.3918399512767792, + "learning_rate": 1.395691773890415e-05, + "loss": 0.2248, + "step": 3218, + "teacher_loss": 0.20627890527248383 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.8759027719497681, + "learning_rate": 1.3961254879282926e-05, + "loss": 0.4759, + "step": 3219, + "teacher_loss": 0.43150097131729126 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.6954171657562256, + "learning_rate": 1.3965592019661704e-05, + "loss": 0.3354, + "step": 3220, + "teacher_loss": 0.2953682839870453 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.436975359916687, + "learning_rate": 1.3969929160040481e-05, + "loss": 0.2817, + "step": 3221, + "teacher_loss": 0.2644992470741272 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.41084232926368713, + "learning_rate": 1.3974266300419257e-05, + "loss": 0.1883, + "step": 3222, + "teacher_loss": 0.16359253227710724 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.33008110523223877, + "learning_rate": 1.3978603440798033e-05, + "loss": 0.1844, + "step": 3223, + "teacher_loss": 0.16815996170043945 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.45917463302612305, + "learning_rate": 1.398294058117681e-05, + "loss": 0.3418, + "step": 3224, + "teacher_loss": 0.32876139879226685 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.37146615982055664, + "learning_rate": 1.3987277721555588e-05, + "loss": 0.247, + "step": 3225, + "teacher_loss": 0.2331475019454956 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5741246342658997, + "learning_rate": 1.3991614861934366e-05, + "loss": 0.3961, + "step": 3226, + "teacher_loss": 0.37629234790802 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.46848800778388977, + "learning_rate": 1.3995952002313142e-05, + "loss": 0.2355, + "step": 3227, + "teacher_loss": 0.20961040258407593 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 1.062383770942688, + "learning_rate": 1.4000289142691918e-05, + "loss": 0.477, + "step": 3228, + "teacher_loss": 0.4119214415550232 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.6475208401679993, + "learning_rate": 1.4004626283070695e-05, + "loss": 0.2255, + "step": 3229, + "teacher_loss": 0.17866329848766327 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.23250412940979004, + "learning_rate": 1.4008963423449473e-05, + "loss": 0.239, + "step": 3230, + "teacher_loss": 0.23974217474460602 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.48672613501548767, + "learning_rate": 1.401330056382825e-05, + "loss": 0.2313, + "step": 3231, + "teacher_loss": 0.2028699815273285 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5655722618103027, + "learning_rate": 1.4017637704207027e-05, + "loss": 0.2356, + "step": 3232, + "teacher_loss": 0.19898763298988342 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.5639442205429077, + "learning_rate": 1.4021974844585802e-05, + "loss": 0.3068, + "step": 3233, + "teacher_loss": 0.27822068333625793 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.571675717830658, + "learning_rate": 1.402631198496458e-05, + "loss": 0.2412, + "step": 3234, + "teacher_loss": 0.2044321596622467 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.4403180480003357, + "learning_rate": 1.4030649125343358e-05, + "loss": 0.2156, + "step": 3235, + "teacher_loss": 0.19065716862678528 + }, + { + "compression_loss": 0.0, + "epoch": 0.58, + "label_loss": 0.504184365272522, + "learning_rate": 1.4034986265722135e-05, + "loss": 0.2927, + "step": 3236, + "teacher_loss": 0.2692336440086365 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5931829810142517, + "learning_rate": 1.4039323406100911e-05, + "loss": 0.3365, + "step": 3237, + "teacher_loss": 0.30800577998161316 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.16033734381198883, + "learning_rate": 1.4043660546479687e-05, + "loss": 0.173, + "step": 3238, + "teacher_loss": 0.17442336678504944 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.39114317297935486, + "learning_rate": 1.4047997686858465e-05, + "loss": 0.2597, + "step": 3239, + "teacher_loss": 0.24505002796649933 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.6499992609024048, + "learning_rate": 1.4052334827237242e-05, + "loss": 0.2077, + "step": 3240, + "teacher_loss": 0.1585073173046112 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4508711099624634, + "learning_rate": 1.4056671967616018e-05, + "loss": 0.2465, + "step": 3241, + "teacher_loss": 0.22374692559242249 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.39568930864334106, + "learning_rate": 1.4061009107994796e-05, + "loss": 0.2125, + "step": 3242, + "teacher_loss": 0.19212275743484497 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.673302173614502, + "learning_rate": 1.4065346248373574e-05, + "loss": 0.4002, + "step": 3243, + "teacher_loss": 0.3698629140853882 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.33611422777175903, + "learning_rate": 1.406968338875235e-05, + "loss": 0.3088, + "step": 3244, + "teacher_loss": 0.30579808354377747 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.24481135606765747, + "learning_rate": 1.4074020529131125e-05, + "loss": 0.1895, + "step": 3245, + "teacher_loss": 0.18340739607810974 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.34882795810699463, + "learning_rate": 1.4078357669509903e-05, + "loss": 0.2426, + "step": 3246, + "teacher_loss": 0.2307722270488739 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5321573615074158, + "learning_rate": 1.408269480988868e-05, + "loss": 0.3509, + "step": 3247, + "teacher_loss": 0.3308143615722656 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4466513395309448, + "learning_rate": 1.4087031950267458e-05, + "loss": 0.2673, + "step": 3248, + "teacher_loss": 0.24736738204956055 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.60186767578125, + "learning_rate": 1.4091369090646234e-05, + "loss": 0.348, + "step": 3249, + "teacher_loss": 0.3197728991508484 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.32665306329727173, + "learning_rate": 1.409570623102501e-05, + "loss": 0.1722, + "step": 3250, + "teacher_loss": 0.15509089827537537 + }, + { + "epoch": 0.59, + "eval_exact_match": 79.36613055818354, + "eval_f1": 86.82463777405437, + "step": 3250 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.648729681968689, + "learning_rate": 1.4100043371403788e-05, + "loss": 0.246, + "step": 3251, + "teacher_loss": 0.20126372575759888 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.87117600440979, + "learning_rate": 1.4104380511782565e-05, + "loss": 0.2952, + "step": 3252, + "teacher_loss": 0.23118405044078827 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.7060363292694092, + "learning_rate": 1.4108717652161343e-05, + "loss": 0.232, + "step": 3253, + "teacher_loss": 0.1793704628944397 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 1.0135180950164795, + "learning_rate": 1.4113054792540119e-05, + "loss": 0.542, + "step": 3254, + "teacher_loss": 0.48965102434158325 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.23564046621322632, + "learning_rate": 1.4117391932918895e-05, + "loss": 0.2493, + "step": 3255, + "teacher_loss": 0.2507632374763489 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4256027340888977, + "learning_rate": 1.4121729073297673e-05, + "loss": 0.1767, + "step": 3256, + "teacher_loss": 0.1490747183561325 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.43884116411209106, + "learning_rate": 1.412606621367645e-05, + "loss": 0.1902, + "step": 3257, + "teacher_loss": 0.16257108747959137 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.2848835289478302, + "learning_rate": 1.4130403354055228e-05, + "loss": 0.183, + "step": 3258, + "teacher_loss": 0.17172209918498993 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.15417338907718658, + "learning_rate": 1.4134740494434004e-05, + "loss": 0.2071, + "step": 3259, + "teacher_loss": 0.21302761137485504 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.37323999404907227, + "learning_rate": 1.413907763481278e-05, + "loss": 0.1956, + "step": 3260, + "teacher_loss": 0.1758425235748291 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.33266329765319824, + "learning_rate": 1.4143414775191557e-05, + "loss": 0.2197, + "step": 3261, + "teacher_loss": 0.2071089744567871 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4028015732765198, + "learning_rate": 1.4147751915570335e-05, + "loss": 0.3019, + "step": 3262, + "teacher_loss": 0.29073506593704224 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5626373887062073, + "learning_rate": 1.415208905594911e-05, + "loss": 0.2577, + "step": 3263, + "teacher_loss": 0.22385695576667786 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.11185181140899658, + "learning_rate": 1.4156426196327888e-05, + "loss": 0.185, + "step": 3264, + "teacher_loss": 0.1931128352880478 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.7400377988815308, + "learning_rate": 1.4160763336706666e-05, + "loss": 0.2806, + "step": 3265, + "teacher_loss": 0.22954051196575165 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.3646808862686157, + "learning_rate": 1.4165100477085442e-05, + "loss": 0.1755, + "step": 3266, + "teacher_loss": 0.15452909469604492 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.29807019233703613, + "learning_rate": 1.4169437617464218e-05, + "loss": 0.2813, + "step": 3267, + "teacher_loss": 0.27948975563049316 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4631107449531555, + "learning_rate": 1.4173774757842996e-05, + "loss": 0.292, + "step": 3268, + "teacher_loss": 0.27296876907348633 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5172280669212341, + "learning_rate": 1.4178111898221773e-05, + "loss": 0.3388, + "step": 3269, + "teacher_loss": 0.3189891576766968 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.2955642342567444, + "learning_rate": 1.418244903860055e-05, + "loss": 0.1977, + "step": 3270, + "teacher_loss": 0.18683956563472748 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.37230587005615234, + "learning_rate": 1.4186786178979327e-05, + "loss": 0.2962, + "step": 3271, + "teacher_loss": 0.28777503967285156 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5284925103187561, + "learning_rate": 1.4191123319358103e-05, + "loss": 0.3393, + "step": 3272, + "teacher_loss": 0.31830745935440063 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4318163990974426, + "learning_rate": 1.419546045973688e-05, + "loss": 0.4024, + "step": 3273, + "teacher_loss": 0.3991623520851135 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.323825865983963, + "learning_rate": 1.4199797600115658e-05, + "loss": 0.2059, + "step": 3274, + "teacher_loss": 0.19276204705238342 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5438075065612793, + "learning_rate": 1.4204134740494435e-05, + "loss": 0.3048, + "step": 3275, + "teacher_loss": 0.2782895565032959 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.3739818334579468, + "learning_rate": 1.4208471880873211e-05, + "loss": 0.3196, + "step": 3276, + "teacher_loss": 0.3135823607444763 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.25560757517814636, + "learning_rate": 1.4212809021251987e-05, + "loss": 0.1945, + "step": 3277, + "teacher_loss": 0.18769824504852295 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.4864589273929596, + "learning_rate": 1.4217146161630765e-05, + "loss": 0.2565, + "step": 3278, + "teacher_loss": 0.23090685904026031 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.6192125678062439, + "learning_rate": 1.4221483302009543e-05, + "loss": 0.3342, + "step": 3279, + "teacher_loss": 0.30247747898101807 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.3190414607524872, + "learning_rate": 1.422582044238832e-05, + "loss": 0.23, + "step": 3280, + "teacher_loss": 0.22011640667915344 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.7085514068603516, + "learning_rate": 1.4230157582767096e-05, + "loss": 0.3197, + "step": 3281, + "teacher_loss": 0.2765168249607086 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.18702015280723572, + "learning_rate": 1.4234494723145872e-05, + "loss": 0.2312, + "step": 3282, + "teacher_loss": 0.2360638678073883 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.1558387726545334, + "learning_rate": 1.423883186352465e-05, + "loss": 0.2178, + "step": 3283, + "teacher_loss": 0.2246885895729065 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.8111177682876587, + "learning_rate": 1.4243169003903427e-05, + "loss": 0.2903, + "step": 3284, + "teacher_loss": 0.23241904377937317 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.5067479610443115, + "learning_rate": 1.4247506144282203e-05, + "loss": 0.2373, + "step": 3285, + "teacher_loss": 0.20737068355083466 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.27260512113571167, + "learning_rate": 1.4251843284660981e-05, + "loss": 0.1753, + "step": 3286, + "teacher_loss": 0.16451531648635864 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.3851807117462158, + "learning_rate": 1.4256180425039757e-05, + "loss": 0.2029, + "step": 3287, + "teacher_loss": 0.18259108066558838 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.32090073823928833, + "learning_rate": 1.4260517565418534e-05, + "loss": 0.2403, + "step": 3288, + "teacher_loss": 0.23139940202236176 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.6241574287414551, + "learning_rate": 1.426485470579731e-05, + "loss": 0.3382, + "step": 3289, + "teacher_loss": 0.30637410283088684 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.28775396943092346, + "learning_rate": 1.4269191846176088e-05, + "loss": 0.2106, + "step": 3290, + "teacher_loss": 0.20206031203269958 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.217560276389122, + "learning_rate": 1.4273528986554866e-05, + "loss": 0.1902, + "step": 3291, + "teacher_loss": 0.1871793419122696 + }, + { + "compression_loss": 0.0, + "epoch": 0.59, + "label_loss": 0.20497804880142212, + "learning_rate": 1.4277866126933643e-05, + "loss": 0.1692, + "step": 3292, + "teacher_loss": 0.16525980830192566 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.39691948890686035, + "learning_rate": 1.4282203267312419e-05, + "loss": 0.2641, + "step": 3293, + "teacher_loss": 0.24930711090564728 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.21685676276683807, + "learning_rate": 1.4286540407691195e-05, + "loss": 0.2561, + "step": 3294, + "teacher_loss": 0.2604849934577942 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.3912780284881592, + "learning_rate": 1.4290877548069973e-05, + "loss": 0.2783, + "step": 3295, + "teacher_loss": 0.26579728722572327 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.22853434085845947, + "learning_rate": 1.429521468844875e-05, + "loss": 0.2228, + "step": 3296, + "teacher_loss": 0.22212429344654083 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.7538334131240845, + "learning_rate": 1.4299551828827528e-05, + "loss": 0.2655, + "step": 3297, + "teacher_loss": 0.21122616529464722 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.4660327434539795, + "learning_rate": 1.4303888969206302e-05, + "loss": 0.2171, + "step": 3298, + "teacher_loss": 0.1894235610961914 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.5630836486816406, + "learning_rate": 1.430822610958508e-05, + "loss": 0.2606, + "step": 3299, + "teacher_loss": 0.22695714235305786 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.2052677571773529, + "learning_rate": 1.4312563249963857e-05, + "loss": 0.1629, + "step": 3300, + "teacher_loss": 0.15818661451339722 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.5099402666091919, + "learning_rate": 1.4316900390342635e-05, + "loss": 0.2393, + "step": 3301, + "teacher_loss": 0.2092798948287964 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.10229835659265518, + "learning_rate": 1.4321237530721413e-05, + "loss": 0.1556, + "step": 3302, + "teacher_loss": 0.16149216890335083 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.47938284277915955, + "learning_rate": 1.4325574671100189e-05, + "loss": 0.2057, + "step": 3303, + "teacher_loss": 0.17533773183822632 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.5525467991828918, + "learning_rate": 1.4329911811478964e-05, + "loss": 0.2504, + "step": 3304, + "teacher_loss": 0.21685676276683807 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.7509241104125977, + "learning_rate": 1.4334248951857742e-05, + "loss": 0.2359, + "step": 3305, + "teacher_loss": 0.1787256896495819 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.37272554636001587, + "learning_rate": 1.433858609223652e-05, + "loss": 0.2192, + "step": 3306, + "teacher_loss": 0.20218569040298462 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6773841381072998, + "learning_rate": 1.4342923232615296e-05, + "loss": 0.2566, + "step": 3307, + "teacher_loss": 0.2098880410194397 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.8599147200584412, + "learning_rate": 1.4347260372994073e-05, + "loss": 0.3662, + "step": 3308, + "teacher_loss": 0.3113848865032196 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6239175200462341, + "learning_rate": 1.435159751337285e-05, + "loss": 0.2336, + "step": 3309, + "teacher_loss": 0.1902831494808197 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.41819119453430176, + "learning_rate": 1.4355934653751627e-05, + "loss": 0.3033, + "step": 3310, + "teacher_loss": 0.29053324460983276 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.33756381273269653, + "learning_rate": 1.4360271794130403e-05, + "loss": 0.235, + "step": 3311, + "teacher_loss": 0.22365188598632812 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.4412263333797455, + "learning_rate": 1.436460893450918e-05, + "loss": 0.536, + "step": 3312, + "teacher_loss": 0.546475350856781 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.8449076414108276, + "learning_rate": 1.4368946074887958e-05, + "loss": 0.3005, + "step": 3313, + "teacher_loss": 0.2399749904870987 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.4076206088066101, + "learning_rate": 1.4373283215266736e-05, + "loss": 0.2032, + "step": 3314, + "teacher_loss": 0.18047389388084412 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.2986910939216614, + "learning_rate": 1.4377620355645512e-05, + "loss": 0.2505, + "step": 3315, + "teacher_loss": 0.2451736330986023 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.22177907824516296, + "learning_rate": 1.4381957496024287e-05, + "loss": 0.189, + "step": 3316, + "teacher_loss": 0.18540169298648834 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.7372938394546509, + "learning_rate": 1.4386294636403065e-05, + "loss": 0.347, + "step": 3317, + "teacher_loss": 0.30368396639823914 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.21904560923576355, + "learning_rate": 1.4390631776781843e-05, + "loss": 0.2347, + "step": 3318, + "teacher_loss": 0.2364935725927353 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.27498117089271545, + "learning_rate": 1.439496891716062e-05, + "loss": 0.1977, + "step": 3319, + "teacher_loss": 0.18913918733596802 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.35954350233078003, + "learning_rate": 1.4399306057539395e-05, + "loss": 0.2243, + "step": 3320, + "teacher_loss": 0.20928901433944702 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.3509919047355652, + "learning_rate": 1.4403643197918172e-05, + "loss": 0.283, + "step": 3321, + "teacher_loss": 0.27545228600502014 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.2068735957145691, + "learning_rate": 1.440798033829695e-05, + "loss": 0.175, + "step": 3322, + "teacher_loss": 0.17141315340995789 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.3089473843574524, + "learning_rate": 1.4412317478675727e-05, + "loss": 0.2518, + "step": 3323, + "teacher_loss": 0.24548670649528503 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.4513481557369232, + "learning_rate": 1.4416654619054505e-05, + "loss": 0.2358, + "step": 3324, + "teacher_loss": 0.21183869242668152 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6127954721450806, + "learning_rate": 1.4420991759433281e-05, + "loss": 0.2664, + "step": 3325, + "teacher_loss": 0.22791558504104614 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6712700724601746, + "learning_rate": 1.4425328899812057e-05, + "loss": 0.2892, + "step": 3326, + "teacher_loss": 0.24672988057136536 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.36389631032943726, + "learning_rate": 1.4429666040190835e-05, + "loss": 0.2207, + "step": 3327, + "teacher_loss": 0.20480097830295563 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.2541850805282593, + "learning_rate": 1.4434003180569612e-05, + "loss": 0.2058, + "step": 3328, + "teacher_loss": 0.20037469267845154 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.38025254011154175, + "learning_rate": 1.4438340320948388e-05, + "loss": 0.2198, + "step": 3329, + "teacher_loss": 0.2020048201084137 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.46609577536582947, + "learning_rate": 1.4442677461327166e-05, + "loss": 0.2782, + "step": 3330, + "teacher_loss": 0.25734543800354004 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.22417768836021423, + "learning_rate": 1.4447014601705942e-05, + "loss": 0.1802, + "step": 3331, + "teacher_loss": 0.17527559399604797 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.22912713885307312, + "learning_rate": 1.445135174208472e-05, + "loss": 0.2432, + "step": 3332, + "teacher_loss": 0.24478529393672943 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6016044616699219, + "learning_rate": 1.4455688882463495e-05, + "loss": 0.2983, + "step": 3333, + "teacher_loss": 0.26455914974212646 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.21600395441055298, + "learning_rate": 1.4460026022842273e-05, + "loss": 0.2025, + "step": 3334, + "teacher_loss": 0.2010117471218109 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.4427329897880554, + "learning_rate": 1.446436316322105e-05, + "loss": 0.2734, + "step": 3335, + "teacher_loss": 0.25455036759376526 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.38411688804626465, + "learning_rate": 1.4468700303599826e-05, + "loss": 0.3439, + "step": 3336, + "teacher_loss": 0.33941006660461426 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.8976858258247375, + "learning_rate": 1.4473037443978604e-05, + "loss": 0.3332, + "step": 3337, + "teacher_loss": 0.27052175998687744 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.35446709394454956, + "learning_rate": 1.447737458435738e-05, + "loss": 0.1938, + "step": 3338, + "teacher_loss": 0.17591030895709991 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.8310773372650146, + "learning_rate": 1.4481711724736158e-05, + "loss": 0.3417, + "step": 3339, + "teacher_loss": 0.2873011827468872 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6898626089096069, + "learning_rate": 1.4486048865114935e-05, + "loss": 0.2246, + "step": 3340, + "teacher_loss": 0.17290669679641724 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.5938445329666138, + "learning_rate": 1.4490386005493713e-05, + "loss": 0.3124, + "step": 3341, + "teacher_loss": 0.28109943866729736 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.3888205885887146, + "learning_rate": 1.4494723145872487e-05, + "loss": 0.2463, + "step": 3342, + "teacher_loss": 0.23047898709774017 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.20008406043052673, + "learning_rate": 1.4499060286251265e-05, + "loss": 0.1841, + "step": 3343, + "teacher_loss": 0.18233180046081543 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.3519536852836609, + "learning_rate": 1.4503397426630042e-05, + "loss": 0.2286, + "step": 3344, + "teacher_loss": 0.2149081826210022 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.6598502397537231, + "learning_rate": 1.450773456700882e-05, + "loss": 0.2824, + "step": 3345, + "teacher_loss": 0.24050915241241455 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.33505779504776, + "learning_rate": 1.4512071707387598e-05, + "loss": 0.2648, + "step": 3346, + "teacher_loss": 0.25701916217803955 + }, + { + "compression_loss": 0.0, + "epoch": 0.6, + "label_loss": 0.5194143652915955, + "learning_rate": 1.4516408847766372e-05, + "loss": 0.2231, + "step": 3347, + "teacher_loss": 0.19022509455680847 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.42489635944366455, + "learning_rate": 1.452074598814515e-05, + "loss": 0.2206, + "step": 3348, + "teacher_loss": 0.1978747844696045 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4172751009464264, + "learning_rate": 1.4525083128523927e-05, + "loss": 0.188, + "step": 3349, + "teacher_loss": 0.16256208717823029 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.49236616492271423, + "learning_rate": 1.4529420268902705e-05, + "loss": 0.2505, + "step": 3350, + "teacher_loss": 0.22359010577201843 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.5252891778945923, + "learning_rate": 1.453375740928148e-05, + "loss": 0.2546, + "step": 3351, + "teacher_loss": 0.2245016098022461 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.31457340717315674, + "learning_rate": 1.4538094549660258e-05, + "loss": 0.2998, + "step": 3352, + "teacher_loss": 0.2981276214122772 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.25483405590057373, + "learning_rate": 1.4542431690039034e-05, + "loss": 0.2666, + "step": 3353, + "teacher_loss": 0.2678525745868683 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.1261131912469864, + "learning_rate": 1.4546768830417812e-05, + "loss": 0.2029, + "step": 3354, + "teacher_loss": 0.21145838499069214 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.6247426271438599, + "learning_rate": 1.4551105970796588e-05, + "loss": 0.2099, + "step": 3355, + "teacher_loss": 0.16385877132415771 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.31666791439056396, + "learning_rate": 1.4555443111175365e-05, + "loss": 0.2929, + "step": 3356, + "teacher_loss": 0.2903040945529938 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4036228060722351, + "learning_rate": 1.4559780251554143e-05, + "loss": 0.2718, + "step": 3357, + "teacher_loss": 0.2571929693222046 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.40825363993644714, + "learning_rate": 1.4564117391932919e-05, + "loss": 0.2489, + "step": 3358, + "teacher_loss": 0.2312489151954651 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.33915138244628906, + "learning_rate": 1.4568454532311696e-05, + "loss": 0.2147, + "step": 3359, + "teacher_loss": 0.20091068744659424 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.3459758758544922, + "learning_rate": 1.4572791672690472e-05, + "loss": 0.1796, + "step": 3360, + "teacher_loss": 0.16113825142383575 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4025750160217285, + "learning_rate": 1.457712881306925e-05, + "loss": 0.1916, + "step": 3361, + "teacher_loss": 0.16817405819892883 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4482371509075165, + "learning_rate": 1.4581465953448028e-05, + "loss": 0.3028, + "step": 3362, + "teacher_loss": 0.2865876853466034 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.423972487449646, + "learning_rate": 1.4585803093826805e-05, + "loss": 0.294, + "step": 3363, + "teacher_loss": 0.2795642912387848 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.30741673707962036, + "learning_rate": 1.459014023420558e-05, + "loss": 0.2836, + "step": 3364, + "teacher_loss": 0.28096073865890503 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.42777758836746216, + "learning_rate": 1.4594477374584357e-05, + "loss": 0.2139, + "step": 3365, + "teacher_loss": 0.19011515378952026 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.1712648570537567, + "learning_rate": 1.4598814514963135e-05, + "loss": 0.2169, + "step": 3366, + "teacher_loss": 0.22198635339736938 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.30421602725982666, + "learning_rate": 1.4603151655341912e-05, + "loss": 0.2344, + "step": 3367, + "teacher_loss": 0.2266312688589096 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.40833213925361633, + "learning_rate": 1.460748879572069e-05, + "loss": 0.2707, + "step": 3368, + "teacher_loss": 0.2553955614566803 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4344463348388672, + "learning_rate": 1.4611825936099464e-05, + "loss": 0.3083, + "step": 3369, + "teacher_loss": 0.29432785511016846 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.46722233295440674, + "learning_rate": 1.4616163076478242e-05, + "loss": 0.2445, + "step": 3370, + "teacher_loss": 0.2197730988264084 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.31049641966819763, + "learning_rate": 1.462050021685702e-05, + "loss": 0.1851, + "step": 3371, + "teacher_loss": 0.17112025618553162 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4445427656173706, + "learning_rate": 1.4624837357235797e-05, + "loss": 0.3429, + "step": 3372, + "teacher_loss": 0.33159640431404114 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.3296544551849365, + "learning_rate": 1.4629174497614573e-05, + "loss": 0.2311, + "step": 3373, + "teacher_loss": 0.22016921639442444 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.7067635655403137, + "learning_rate": 1.463351163799335e-05, + "loss": 0.3513, + "step": 3374, + "teacher_loss": 0.31182870268821716 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.2632507085800171, + "learning_rate": 1.4637848778372127e-05, + "loss": 0.1555, + "step": 3375, + "teacher_loss": 0.14351242780685425 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.49469077587127686, + "learning_rate": 1.4642185918750904e-05, + "loss": 0.2288, + "step": 3376, + "teacher_loss": 0.19925202429294586 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.2655493915081024, + "learning_rate": 1.464652305912968e-05, + "loss": 0.2478, + "step": 3377, + "teacher_loss": 0.24585343897342682 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.12248153239488602, + "learning_rate": 1.4650860199508458e-05, + "loss": 0.1809, + "step": 3378, + "teacher_loss": 0.1874275654554367 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.262132465839386, + "learning_rate": 1.4655197339887235e-05, + "loss": 0.2307, + "step": 3379, + "teacher_loss": 0.22716563940048218 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.2127070426940918, + "learning_rate": 1.4659534480266011e-05, + "loss": 0.1859, + "step": 3380, + "teacher_loss": 0.18286865949630737 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.5640524625778198, + "learning_rate": 1.4663871620644789e-05, + "loss": 0.2994, + "step": 3381, + "teacher_loss": 0.27000921964645386 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.31749674677848816, + "learning_rate": 1.4668208761023565e-05, + "loss": 0.2185, + "step": 3382, + "teacher_loss": 0.2074557989835739 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.2831347584724426, + "learning_rate": 1.4672545901402342e-05, + "loss": 0.2387, + "step": 3383, + "teacher_loss": 0.23373189568519592 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.41861122846603394, + "learning_rate": 1.467688304178112e-05, + "loss": 0.2573, + "step": 3384, + "teacher_loss": 0.2393331527709961 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.40169963240623474, + "learning_rate": 1.4681220182159896e-05, + "loss": 0.1617, + "step": 3385, + "teacher_loss": 0.1349981129169464 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.5008865594863892, + "learning_rate": 1.4685557322538672e-05, + "loss": 0.2453, + "step": 3386, + "teacher_loss": 0.21694722771644592 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.23056113719940186, + "learning_rate": 1.468989446291745e-05, + "loss": 0.2116, + "step": 3387, + "teacher_loss": 0.20954419672489166 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.5144234895706177, + "learning_rate": 1.4694231603296227e-05, + "loss": 0.2964, + "step": 3388, + "teacher_loss": 0.27213501930236816 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.563983678817749, + "learning_rate": 1.4698568743675005e-05, + "loss": 0.2342, + "step": 3389, + "teacher_loss": 0.19753378629684448 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.17529204487800598, + "learning_rate": 1.4702905884053782e-05, + "loss": 0.2057, + "step": 3390, + "teacher_loss": 0.2090734988451004 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.9061448574066162, + "learning_rate": 1.4707243024432557e-05, + "loss": 0.4089, + "step": 3391, + "teacher_loss": 0.3536835312843323 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.5815277099609375, + "learning_rate": 1.4711580164811334e-05, + "loss": 0.2109, + "step": 3392, + "teacher_loss": 0.16971027851104736 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4202399253845215, + "learning_rate": 1.4715917305190112e-05, + "loss": 0.2154, + "step": 3393, + "teacher_loss": 0.1926787793636322 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.2878839373588562, + "learning_rate": 1.472025444556889e-05, + "loss": 0.2824, + "step": 3394, + "teacher_loss": 0.28177952766418457 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.31564658880233765, + "learning_rate": 1.4724591585947665e-05, + "loss": 0.1564, + "step": 3395, + "teacher_loss": 0.13865971565246582 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.24211916327476501, + "learning_rate": 1.4728928726326441e-05, + "loss": 0.1774, + "step": 3396, + "teacher_loss": 0.17018786072731018 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.09590311348438263, + "learning_rate": 1.4733265866705219e-05, + "loss": 0.2186, + "step": 3397, + "teacher_loss": 0.23228822648525238 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.22575128078460693, + "learning_rate": 1.4737603007083997e-05, + "loss": 0.177, + "step": 3398, + "teacher_loss": 0.17160624265670776 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.7020028829574585, + "learning_rate": 1.4741940147462774e-05, + "loss": 0.2932, + "step": 3399, + "teacher_loss": 0.24777421355247498 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.20376479625701904, + "learning_rate": 1.474627728784155e-05, + "loss": 0.2622, + "step": 3400, + "teacher_loss": 0.2686937749385834 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.4309338331222534, + "learning_rate": 1.4750614428220328e-05, + "loss": 0.2503, + "step": 3401, + "teacher_loss": 0.2301778793334961 + }, + { + "compression_loss": 0.0, + "epoch": 0.61, + "label_loss": 0.5975043177604675, + "learning_rate": 1.4754951568599104e-05, + "loss": 0.3061, + "step": 3402, + "teacher_loss": 0.27368927001953125 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3087449073791504, + "learning_rate": 1.4759288708977881e-05, + "loss": 0.1939, + "step": 3403, + "teacher_loss": 0.18109653890132904 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.749091386795044, + "learning_rate": 1.4763625849356657e-05, + "loss": 0.462, + "step": 3404, + "teacher_loss": 0.4301269054412842 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3045800030231476, + "learning_rate": 1.4767962989735435e-05, + "loss": 0.3074, + "step": 3405, + "teacher_loss": 0.3077341616153717 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.27656614780426025, + "learning_rate": 1.4772300130114212e-05, + "loss": 0.2216, + "step": 3406, + "teacher_loss": 0.2154550701379776 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.34204572439193726, + "learning_rate": 1.4776637270492988e-05, + "loss": 0.233, + "step": 3407, + "teacher_loss": 0.2208937406539917 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.6076553463935852, + "learning_rate": 1.4780974410871764e-05, + "loss": 0.2613, + "step": 3408, + "teacher_loss": 0.22278070449829102 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3603961169719696, + "learning_rate": 1.4785311551250542e-05, + "loss": 0.4114, + "step": 3409, + "teacher_loss": 0.41710513830184937 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.41302379965782166, + "learning_rate": 1.478964869162932e-05, + "loss": 0.3728, + "step": 3410, + "teacher_loss": 0.3682812452316284 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.24380508065223694, + "learning_rate": 1.4793985832008097e-05, + "loss": 0.2926, + "step": 3411, + "teacher_loss": 0.29801759123802185 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.28050437569618225, + "learning_rate": 1.4798322972386875e-05, + "loss": 0.1697, + "step": 3412, + "teacher_loss": 0.1573762148618698 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.277423232793808, + "learning_rate": 1.4802660112765649e-05, + "loss": 0.2684, + "step": 3413, + "teacher_loss": 0.2673839330673218 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 1.0526862144470215, + "learning_rate": 1.4806997253144427e-05, + "loss": 0.2522, + "step": 3414, + "teacher_loss": 0.1632212996482849 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3796747624874115, + "learning_rate": 1.4811334393523204e-05, + "loss": 0.2379, + "step": 3415, + "teacher_loss": 0.2221188098192215 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.34162837266921997, + "learning_rate": 1.4815671533901982e-05, + "loss": 0.3372, + "step": 3416, + "teacher_loss": 0.33666083216667175 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.29953110218048096, + "learning_rate": 1.4820008674280758e-05, + "loss": 0.1704, + "step": 3417, + "teacher_loss": 0.15602856874465942 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.7754138708114624, + "learning_rate": 1.4824345814659534e-05, + "loss": 0.2568, + "step": 3418, + "teacher_loss": 0.1991792619228363 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2654818296432495, + "learning_rate": 1.4828682955038311e-05, + "loss": 0.2571, + "step": 3419, + "teacher_loss": 0.25615718960762024 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.18821729719638824, + "learning_rate": 1.4833020095417089e-05, + "loss": 0.1265, + "step": 3420, + "teacher_loss": 0.11963748186826706 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2233489602804184, + "learning_rate": 1.4837357235795867e-05, + "loss": 0.1507, + "step": 3421, + "teacher_loss": 0.14262288808822632 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4572219252586365, + "learning_rate": 1.4841694376174643e-05, + "loss": 0.2487, + "step": 3422, + "teacher_loss": 0.22548678517341614 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4110633134841919, + "learning_rate": 1.484603151655342e-05, + "loss": 0.2503, + "step": 3423, + "teacher_loss": 0.23239555954933167 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.36263561248779297, + "learning_rate": 1.4850368656932196e-05, + "loss": 0.2048, + "step": 3424, + "teacher_loss": 0.18724486231803894 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.6267473697662354, + "learning_rate": 1.4854705797310974e-05, + "loss": 0.3407, + "step": 3425, + "teacher_loss": 0.30892235040664673 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4371982216835022, + "learning_rate": 1.485904293768975e-05, + "loss": 0.3395, + "step": 3426, + "teacher_loss": 0.32864153385162354 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.5955886840820312, + "learning_rate": 1.4863380078068527e-05, + "loss": 0.31, + "step": 3427, + "teacher_loss": 0.2782576382160187 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4124058485031128, + "learning_rate": 1.4867717218447305e-05, + "loss": 0.2636, + "step": 3428, + "teacher_loss": 0.24701912701129913 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.37787556648254395, + "learning_rate": 1.4872054358826081e-05, + "loss": 0.2658, + "step": 3429, + "teacher_loss": 0.2533859312534332 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3903588056564331, + "learning_rate": 1.4876391499204857e-05, + "loss": 0.2042, + "step": 3430, + "teacher_loss": 0.18352849781513214 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.33652323484420776, + "learning_rate": 1.4880728639583634e-05, + "loss": 0.1875, + "step": 3431, + "teacher_loss": 0.17089374363422394 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3109883666038513, + "learning_rate": 1.4885065779962412e-05, + "loss": 0.1796, + "step": 3432, + "teacher_loss": 0.1650347113609314 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.37500643730163574, + "learning_rate": 1.488940292034119e-05, + "loss": 0.2515, + "step": 3433, + "teacher_loss": 0.2377958595752716 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2629029154777527, + "learning_rate": 1.4893740060719966e-05, + "loss": 0.3387, + "step": 3434, + "teacher_loss": 0.3470941483974457 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.7807692289352417, + "learning_rate": 1.4898077201098742e-05, + "loss": 0.3638, + "step": 3435, + "teacher_loss": 0.31749826669692993 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4221172332763672, + "learning_rate": 1.4902414341477519e-05, + "loss": 0.2316, + "step": 3436, + "teacher_loss": 0.21043507754802704 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.5393366813659668, + "learning_rate": 1.4906751481856297e-05, + "loss": 0.3467, + "step": 3437, + "teacher_loss": 0.32529664039611816 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.18557564914226532, + "learning_rate": 1.4911088622235074e-05, + "loss": 0.2341, + "step": 3438, + "teacher_loss": 0.23945313692092896 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.5244229435920715, + "learning_rate": 1.491542576261385e-05, + "loss": 0.272, + "step": 3439, + "teacher_loss": 0.24395358562469482 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3325469493865967, + "learning_rate": 1.4919762902992626e-05, + "loss": 0.2178, + "step": 3440, + "teacher_loss": 0.20505845546722412 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.5698983073234558, + "learning_rate": 1.4924100043371404e-05, + "loss": 0.1931, + "step": 3441, + "teacher_loss": 0.15125951170921326 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.24199610948562622, + "learning_rate": 1.4928437183750181e-05, + "loss": 0.2098, + "step": 3442, + "teacher_loss": 0.20624101161956787 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4070262312889099, + "learning_rate": 1.4932774324128959e-05, + "loss": 0.2283, + "step": 3443, + "teacher_loss": 0.208482027053833 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.4251914918422699, + "learning_rate": 1.4937111464507735e-05, + "loss": 0.35, + "step": 3444, + "teacher_loss": 0.34165945649147034 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.20547810196876526, + "learning_rate": 1.4941448604886511e-05, + "loss": 0.1587, + "step": 3445, + "teacher_loss": 0.15346668660640717 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.37087687849998474, + "learning_rate": 1.4945785745265289e-05, + "loss": 0.2152, + "step": 3446, + "teacher_loss": 0.19793125987052917 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.479753315448761, + "learning_rate": 1.4950122885644066e-05, + "loss": 0.1859, + "step": 3447, + "teacher_loss": 0.15327255427837372 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2640385031700134, + "learning_rate": 1.4954460026022842e-05, + "loss": 0.1954, + "step": 3448, + "teacher_loss": 0.1877971738576889 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3383401334285736, + "learning_rate": 1.495879716640162e-05, + "loss": 0.2292, + "step": 3449, + "teacher_loss": 0.21702560782432556 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.5732656121253967, + "learning_rate": 1.4963134306780397e-05, + "loss": 0.2456, + "step": 3450, + "teacher_loss": 0.20914244651794434 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.25484299659729004, + "learning_rate": 1.4967471447159173e-05, + "loss": 0.2286, + "step": 3451, + "teacher_loss": 0.22570136189460754 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2426004260778427, + "learning_rate": 1.497180858753795e-05, + "loss": 0.1862, + "step": 3452, + "teacher_loss": 0.17995625734329224 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.29108572006225586, + "learning_rate": 1.4976145727916727e-05, + "loss": 0.197, + "step": 3453, + "teacher_loss": 0.18654736876487732 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.14845633506774902, + "learning_rate": 1.4980482868295504e-05, + "loss": 0.1964, + "step": 3454, + "teacher_loss": 0.20168940722942352 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2927010953426361, + "learning_rate": 1.4984820008674282e-05, + "loss": 0.2123, + "step": 3455, + "teacher_loss": 0.20332428812980652 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.3085951805114746, + "learning_rate": 1.4989157149053058e-05, + "loss": 0.2331, + "step": 3456, + "teacher_loss": 0.22469574213027954 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2436988353729248, + "learning_rate": 1.4993494289431834e-05, + "loss": 0.2186, + "step": 3457, + "teacher_loss": 0.2158626914024353 + }, + { + "compression_loss": 0.0, + "epoch": 0.62, + "label_loss": 0.2911866307258606, + "learning_rate": 1.4997831429810612e-05, + "loss": 0.2163, + "step": 3458, + "teacher_loss": 0.2079509049654007 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.623927116394043, + "learning_rate": 1.5002168570189387e-05, + "loss": 0.2495, + "step": 3459, + "teacher_loss": 0.20790499448776245 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.44187474250793457, + "learning_rate": 1.5006505710568165e-05, + "loss": 0.2109, + "step": 3460, + "teacher_loss": 0.1851940155029297 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3549068868160248, + "learning_rate": 1.5010842850946943e-05, + "loss": 0.2108, + "step": 3461, + "teacher_loss": 0.19476114213466644 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3677303194999695, + "learning_rate": 1.5015179991325719e-05, + "loss": 0.2465, + "step": 3462, + "teacher_loss": 0.23304778337478638 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.6731287240982056, + "learning_rate": 1.5019517131704496e-05, + "loss": 0.2178, + "step": 3463, + "teacher_loss": 0.1672612726688385 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3033241033554077, + "learning_rate": 1.5023854272083274e-05, + "loss": 0.3211, + "step": 3464, + "teacher_loss": 0.3230874538421631 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.47479310631752014, + "learning_rate": 1.5028191412462052e-05, + "loss": 0.2223, + "step": 3465, + "teacher_loss": 0.19427253305912018 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3262695074081421, + "learning_rate": 1.503252855284083e-05, + "loss": 0.2066, + "step": 3466, + "teacher_loss": 0.19325439631938934 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.42400845885276794, + "learning_rate": 1.5036865693219605e-05, + "loss": 0.2337, + "step": 3467, + "teacher_loss": 0.21253237128257751 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.44303131103515625, + "learning_rate": 1.504120283359838e-05, + "loss": 0.2158, + "step": 3468, + "teacher_loss": 0.19058318436145782 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.6803537607192993, + "learning_rate": 1.5045539973977157e-05, + "loss": 0.3079, + "step": 3469, + "teacher_loss": 0.2665376663208008 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.4041784107685089, + "learning_rate": 1.5049877114355935e-05, + "loss": 0.259, + "step": 3470, + "teacher_loss": 0.2428523600101471 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.5852808952331543, + "learning_rate": 1.5054214254734712e-05, + "loss": 0.1976, + "step": 3471, + "teacher_loss": 0.15454696118831635 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.4442068934440613, + "learning_rate": 1.505855139511349e-05, + "loss": 0.246, + "step": 3472, + "teacher_loss": 0.223946675658226 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.31871461868286133, + "learning_rate": 1.5062888535492266e-05, + "loss": 0.2145, + "step": 3473, + "teacher_loss": 0.20292532444000244 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.27461183071136475, + "learning_rate": 1.5067225675871043e-05, + "loss": 0.1957, + "step": 3474, + "teacher_loss": 0.18689903616905212 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3858081102371216, + "learning_rate": 1.5071562816249821e-05, + "loss": 0.2397, + "step": 3475, + "teacher_loss": 0.2234216332435608 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.301738977432251, + "learning_rate": 1.5075899956628599e-05, + "loss": 0.4482, + "step": 3476, + "teacher_loss": 0.46448424458503723 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3485831618309021, + "learning_rate": 1.5080237097007373e-05, + "loss": 0.3492, + "step": 3477, + "teacher_loss": 0.3492443859577179 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.42254817485809326, + "learning_rate": 1.5084574237386149e-05, + "loss": 0.2039, + "step": 3478, + "teacher_loss": 0.1795610785484314 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3052677810192108, + "learning_rate": 1.5088911377764926e-05, + "loss": 0.2172, + "step": 3479, + "teacher_loss": 0.20743393898010254 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.5425153970718384, + "learning_rate": 1.5093248518143704e-05, + "loss": 0.2933, + "step": 3480, + "teacher_loss": 0.2655656933784485 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.8153358697891235, + "learning_rate": 1.5097585658522482e-05, + "loss": 0.8623, + "step": 3481, + "teacher_loss": 0.8675612807273865 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3451419174671173, + "learning_rate": 1.510192279890126e-05, + "loss": 0.2072, + "step": 3482, + "teacher_loss": 0.1919066458940506 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.4334903955459595, + "learning_rate": 1.5106259939280035e-05, + "loss": 0.3073, + "step": 3483, + "teacher_loss": 0.29331082105636597 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.5016162395477295, + "learning_rate": 1.5110597079658813e-05, + "loss": 0.2573, + "step": 3484, + "teacher_loss": 0.23015466332435608 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3000132739543915, + "learning_rate": 1.5114934220037587e-05, + "loss": 0.2689, + "step": 3485, + "teacher_loss": 0.2654500901699066 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3497113585472107, + "learning_rate": 1.5119271360416365e-05, + "loss": 0.2219, + "step": 3486, + "teacher_loss": 0.2076825350522995 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.5079174041748047, + "learning_rate": 1.5123608500795142e-05, + "loss": 0.1968, + "step": 3487, + "teacher_loss": 0.1622769832611084 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.26312461495399475, + "learning_rate": 1.512794564117392e-05, + "loss": 0.2623, + "step": 3488, + "teacher_loss": 0.2622022032737732 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.32187777757644653, + "learning_rate": 1.5132282781552696e-05, + "loss": 0.3099, + "step": 3489, + "teacher_loss": 0.3085310161113739 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3103920817375183, + "learning_rate": 1.5136619921931473e-05, + "loss": 0.3342, + "step": 3490, + "teacher_loss": 0.33689871430397034 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3706433176994324, + "learning_rate": 1.5140957062310251e-05, + "loss": 0.2537, + "step": 3491, + "teacher_loss": 0.24070224165916443 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.2998778223991394, + "learning_rate": 1.5145294202689029e-05, + "loss": 0.328, + "step": 3492, + "teacher_loss": 0.33110368251800537 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3468497693538666, + "learning_rate": 1.5149631343067806e-05, + "loss": 0.2343, + "step": 3493, + "teacher_loss": 0.2218460738658905 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.6053321361541748, + "learning_rate": 1.5153968483446579e-05, + "loss": 0.3311, + "step": 3494, + "teacher_loss": 0.3006088137626648 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.8557692170143127, + "learning_rate": 1.5158305623825356e-05, + "loss": 0.282, + "step": 3495, + "teacher_loss": 0.2182486355304718 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.5796129107475281, + "learning_rate": 1.5162642764204134e-05, + "loss": 0.3189, + "step": 3496, + "teacher_loss": 0.2899051904678345 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.3411816954612732, + "learning_rate": 1.5166979904582912e-05, + "loss": 0.2684, + "step": 3497, + "teacher_loss": 0.2603408694267273 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.4847777485847473, + "learning_rate": 1.517131704496169e-05, + "loss": 0.2241, + "step": 3498, + "teacher_loss": 0.19509901106357574 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.4022645354270935, + "learning_rate": 1.5175654185340467e-05, + "loss": 0.2164, + "step": 3499, + "teacher_loss": 0.19578978419303894 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.15240633487701416, + "learning_rate": 1.5179991325719243e-05, + "loss": 0.1809, + "step": 3500, + "teacher_loss": 0.18403062224388123 + }, + { + "epoch": 0.63, + "eval_exact_match": 79.72563859981078, + "eval_f1": 87.12878671331147, + "step": 3500 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.21813340485095978, + "learning_rate": 1.518432846609802e-05, + "loss": 0.1765, + "step": 3501, + "teacher_loss": 0.1718859225511551 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.4939407706260681, + "learning_rate": 1.5188665606476798e-05, + "loss": 0.2498, + "step": 3502, + "teacher_loss": 0.22269971668720245 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.2664722204208374, + "learning_rate": 1.5193002746855572e-05, + "loss": 0.1908, + "step": 3503, + "teacher_loss": 0.1824444830417633 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.707490086555481, + "learning_rate": 1.519733988723435e-05, + "loss": 0.3369, + "step": 3504, + "teacher_loss": 0.29577672481536865 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.43466269969940186, + "learning_rate": 1.5201677027613126e-05, + "loss": 0.2525, + "step": 3505, + "teacher_loss": 0.23222452402114868 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.30984991788864136, + "learning_rate": 1.5206014167991904e-05, + "loss": 0.1998, + "step": 3506, + "teacher_loss": 0.18760037422180176 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.5077179670333862, + "learning_rate": 1.5210351308370681e-05, + "loss": 0.4354, + "step": 3507, + "teacher_loss": 0.42732805013656616 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.8567879796028137, + "learning_rate": 1.5214688448749459e-05, + "loss": 0.7833, + "step": 3508, + "teacher_loss": 0.7751364707946777 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.647532045841217, + "learning_rate": 1.5219025589128236e-05, + "loss": 0.3165, + "step": 3509, + "teacher_loss": 0.2797354459762573 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.8472211360931396, + "learning_rate": 1.5223362729507014e-05, + "loss": 0.3033, + "step": 3510, + "teacher_loss": 0.2428738921880722 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.1734766960144043, + "learning_rate": 1.522769986988579e-05, + "loss": 0.1694, + "step": 3511, + "teacher_loss": 0.16892538964748383 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.2166706919670105, + "learning_rate": 1.5232037010264564e-05, + "loss": 0.1902, + "step": 3512, + "teacher_loss": 0.18724998831748962 + }, + { + "compression_loss": 0.0, + "epoch": 0.63, + "label_loss": 0.259573370218277, + "learning_rate": 1.5236374150643342e-05, + "loss": 0.1992, + "step": 3513, + "teacher_loss": 0.19252794981002808 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.740859866142273, + "learning_rate": 1.524071129102212e-05, + "loss": 0.3266, + "step": 3514, + "teacher_loss": 0.2805306315422058 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.42880597710609436, + "learning_rate": 1.5245048431400897e-05, + "loss": 0.2021, + "step": 3515, + "teacher_loss": 0.17694349586963654 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.6684502363204956, + "learning_rate": 1.5249385571779673e-05, + "loss": 0.3493, + "step": 3516, + "teacher_loss": 0.3138298988342285 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.22788703441619873, + "learning_rate": 1.525372271215845e-05, + "loss": 0.2297, + "step": 3517, + "teacher_loss": 0.2299208641052246 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.256854772567749, + "learning_rate": 1.5258059852537228e-05, + "loss": 0.1976, + "step": 3518, + "teacher_loss": 0.19104236364364624 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3419114351272583, + "learning_rate": 1.5262396992916006e-05, + "loss": 0.2456, + "step": 3519, + "teacher_loss": 0.2348945289850235 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.8600484132766724, + "learning_rate": 1.5266734133294783e-05, + "loss": 0.346, + "step": 3520, + "teacher_loss": 0.28883543610572815 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.15551477670669556, + "learning_rate": 1.5271071273673558e-05, + "loss": 0.2121, + "step": 3521, + "teacher_loss": 0.21837365627288818 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.4352140724658966, + "learning_rate": 1.5275408414052335e-05, + "loss": 0.2487, + "step": 3522, + "teacher_loss": 0.22798162698745728 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3538445234298706, + "learning_rate": 1.5279745554431113e-05, + "loss": 0.2407, + "step": 3523, + "teacher_loss": 0.22812360525131226 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.24155201017856598, + "learning_rate": 1.5284082694809887e-05, + "loss": 0.2013, + "step": 3524, + "teacher_loss": 0.19684141874313354 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.17609374225139618, + "learning_rate": 1.5288419835188665e-05, + "loss": 0.1865, + "step": 3525, + "teacher_loss": 0.18765880167484283 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.45157262682914734, + "learning_rate": 1.5292756975567442e-05, + "loss": 0.3323, + "step": 3526, + "teacher_loss": 0.3190820813179016 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.45473435521125793, + "learning_rate": 1.529709411594622e-05, + "loss": 0.2471, + "step": 3527, + "teacher_loss": 0.22407421469688416 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.44554850459098816, + "learning_rate": 1.5301431256324998e-05, + "loss": 0.2381, + "step": 3528, + "teacher_loss": 0.2150544375181198 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.6162223219871521, + "learning_rate": 1.5305768396703772e-05, + "loss": 0.2158, + "step": 3529, + "teacher_loss": 0.17126025259494781 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3820152282714844, + "learning_rate": 1.531010553708255e-05, + "loss": 0.228, + "step": 3530, + "teacher_loss": 0.21084049344062805 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.16359582543373108, + "learning_rate": 1.5314442677461327e-05, + "loss": 0.122, + "step": 3531, + "teacher_loss": 0.1173916757106781 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.4035291075706482, + "learning_rate": 1.5318779817840105e-05, + "loss": 0.3627, + "step": 3532, + "teacher_loss": 0.35814833641052246 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.5482479929924011, + "learning_rate": 1.5323116958218882e-05, + "loss": 0.2288, + "step": 3533, + "teacher_loss": 0.1932622641324997 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.1804192066192627, + "learning_rate": 1.532745409859766e-05, + "loss": 0.1708, + "step": 3534, + "teacher_loss": 0.1697012484073639 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.848974347114563, + "learning_rate": 1.5331791238976434e-05, + "loss": 0.3246, + "step": 3535, + "teacher_loss": 0.26633256673812866 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.6149992942810059, + "learning_rate": 1.5336128379355212e-05, + "loss": 0.2398, + "step": 3536, + "teacher_loss": 0.19810134172439575 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.29140013456344604, + "learning_rate": 1.534046551973399e-05, + "loss": 0.1723, + "step": 3537, + "teacher_loss": 0.15904486179351807 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 1.1961380243301392, + "learning_rate": 1.5344802660112764e-05, + "loss": 0.3743, + "step": 3538, + "teacher_loss": 0.28298419713974 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.5583049058914185, + "learning_rate": 1.534913980049154e-05, + "loss": 0.2936, + "step": 3539, + "teacher_loss": 0.26417407393455505 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.35531988739967346, + "learning_rate": 1.535347694087032e-05, + "loss": 0.2256, + "step": 3540, + "teacher_loss": 0.21116071939468384 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.29444342851638794, + "learning_rate": 1.5357814081249097e-05, + "loss": 0.238, + "step": 3541, + "teacher_loss": 0.23168343305587769 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.45763248205184937, + "learning_rate": 1.5362151221627874e-05, + "loss": 0.1905, + "step": 3542, + "teacher_loss": 0.16076692938804626 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.4373299479484558, + "learning_rate": 1.5366488362006652e-05, + "loss": 0.2672, + "step": 3543, + "teacher_loss": 0.24832850694656372 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.36778074502944946, + "learning_rate": 1.537082550238543e-05, + "loss": 0.2311, + "step": 3544, + "teacher_loss": 0.21589943766593933 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3620985150337219, + "learning_rate": 1.5375162642764207e-05, + "loss": 0.2549, + "step": 3545, + "teacher_loss": 0.24298998713493347 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3954559862613678, + "learning_rate": 1.537949978314298e-05, + "loss": 0.2238, + "step": 3546, + "teacher_loss": 0.2047792226076126 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.23530299961566925, + "learning_rate": 1.5383836923521756e-05, + "loss": 0.2332, + "step": 3547, + "teacher_loss": 0.23301705718040466 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.38806092739105225, + "learning_rate": 1.5388174063900533e-05, + "loss": 0.2375, + "step": 3548, + "teacher_loss": 0.22077591717243195 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.2556641697883606, + "learning_rate": 1.539251120427931e-05, + "loss": 0.1874, + "step": 3549, + "teacher_loss": 0.17984139919281006 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.25484445691108704, + "learning_rate": 1.539684834465809e-05, + "loss": 0.1691, + "step": 3550, + "teacher_loss": 0.15962810814380646 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.8743991851806641, + "learning_rate": 1.5401185485036866e-05, + "loss": 0.5032, + "step": 3551, + "teacher_loss": 0.46190541982650757 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.4293147325515747, + "learning_rate": 1.5405522625415644e-05, + "loss": 0.2516, + "step": 3552, + "teacher_loss": 0.23189595341682434 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.5538841485977173, + "learning_rate": 1.540985976579442e-05, + "loss": 0.2314, + "step": 3553, + "teacher_loss": 0.19551338255405426 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.26218414306640625, + "learning_rate": 1.54141969061732e-05, + "loss": 0.2315, + "step": 3554, + "teacher_loss": 0.22808539867401123 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3066752254962921, + "learning_rate": 1.5418534046551977e-05, + "loss": 0.2218, + "step": 3555, + "teacher_loss": 0.21231423318386078 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.2074851095676422, + "learning_rate": 1.542287118693075e-05, + "loss": 0.197, + "step": 3556, + "teacher_loss": 0.1958634853363037 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.4917834997177124, + "learning_rate": 1.5427208327309525e-05, + "loss": 0.4156, + "step": 3557, + "teacher_loss": 0.4071061611175537 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.33499565720558167, + "learning_rate": 1.5431545467688303e-05, + "loss": 0.1899, + "step": 3558, + "teacher_loss": 0.1737859547138214 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.2396567016839981, + "learning_rate": 1.543588260806708e-05, + "loss": 0.234, + "step": 3559, + "teacher_loss": 0.23334139585494995 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.32577937841415405, + "learning_rate": 1.5440219748445858e-05, + "loss": 0.3202, + "step": 3560, + "teacher_loss": 0.3196263909339905 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.5833309292793274, + "learning_rate": 1.5444556888824635e-05, + "loss": 0.2664, + "step": 3561, + "teacher_loss": 0.23121987283229828 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.21850012242794037, + "learning_rate": 1.5448894029203413e-05, + "loss": 0.2564, + "step": 3562, + "teacher_loss": 0.26059362292289734 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.9676458239555359, + "learning_rate": 1.545323116958219e-05, + "loss": 0.2854, + "step": 3563, + "teacher_loss": 0.20960737764835358 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3769055902957916, + "learning_rate": 1.545756830996097e-05, + "loss": 0.2098, + "step": 3564, + "teacher_loss": 0.19120697677135468 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.3868241608142853, + "learning_rate": 1.5461905450339743e-05, + "loss": 0.2496, + "step": 3565, + "teacher_loss": 0.23430654406547546 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.6553928852081299, + "learning_rate": 1.546624259071852e-05, + "loss": 0.2624, + "step": 3566, + "teacher_loss": 0.21871358156204224 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.4719890356063843, + "learning_rate": 1.5470579731097298e-05, + "loss": 0.2315, + "step": 3567, + "teacher_loss": 0.20473061501979828 + }, + { + "compression_loss": 0.0, + "epoch": 0.64, + "label_loss": 0.5522358417510986, + "learning_rate": 1.5474916871476072e-05, + "loss": 0.4031, + "step": 3568, + "teacher_loss": 0.3865233063697815 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.20686452090740204, + "learning_rate": 1.547925401185485e-05, + "loss": 0.2233, + "step": 3569, + "teacher_loss": 0.2251274734735489 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6643775701522827, + "learning_rate": 1.5483591152233627e-05, + "loss": 0.3061, + "step": 3570, + "teacher_loss": 0.2662498354911804 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.46865981817245483, + "learning_rate": 1.5487928292612405e-05, + "loss": 0.2499, + "step": 3571, + "teacher_loss": 0.2255779206752777 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5450855493545532, + "learning_rate": 1.5492265432991183e-05, + "loss": 0.3315, + "step": 3572, + "teacher_loss": 0.30775558948516846 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.33823856711387634, + "learning_rate": 1.549660257336996e-05, + "loss": 0.2693, + "step": 3573, + "teacher_loss": 0.2616141736507416 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.7502836585044861, + "learning_rate": 1.5500939713748734e-05, + "loss": 0.3328, + "step": 3574, + "teacher_loss": 0.2864474952220917 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.4689588248729706, + "learning_rate": 1.5505276854127512e-05, + "loss": 0.2321, + "step": 3575, + "teacher_loss": 0.20576024055480957 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6034872531890869, + "learning_rate": 1.550961399450629e-05, + "loss": 0.2155, + "step": 3576, + "teacher_loss": 0.17239147424697876 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5122894048690796, + "learning_rate": 1.5513951134885067e-05, + "loss": 0.2647, + "step": 3577, + "teacher_loss": 0.2371799349784851 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5640295743942261, + "learning_rate": 1.5518288275263845e-05, + "loss": 0.2973, + "step": 3578, + "teacher_loss": 0.267711877822876 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.30804014205932617, + "learning_rate": 1.552262541564262e-05, + "loss": 0.2113, + "step": 3579, + "teacher_loss": 0.20055457949638367 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6013041734695435, + "learning_rate": 1.5526962556021397e-05, + "loss": 0.2765, + "step": 3580, + "teacher_loss": 0.2404562383890152 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.3161754012107849, + "learning_rate": 1.5531299696400174e-05, + "loss": 0.227, + "step": 3581, + "teacher_loss": 0.21703986823558807 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.3295535445213318, + "learning_rate": 1.553563683677895e-05, + "loss": 0.2378, + "step": 3582, + "teacher_loss": 0.2276374101638794 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 1.0853112936019897, + "learning_rate": 1.5539973977157726e-05, + "loss": 0.3025, + "step": 3583, + "teacher_loss": 0.21556207537651062 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6198538541793823, + "learning_rate": 1.5544311117536504e-05, + "loss": 0.2453, + "step": 3584, + "teacher_loss": 0.20370802283287048 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.24094390869140625, + "learning_rate": 1.554864825791528e-05, + "loss": 0.206, + "step": 3585, + "teacher_loss": 0.2021379917860031 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.2509448528289795, + "learning_rate": 1.555298539829406e-05, + "loss": 0.155, + "step": 3586, + "teacher_loss": 0.1442955583333969 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.46481892466545105, + "learning_rate": 1.5557322538672837e-05, + "loss": 0.2167, + "step": 3587, + "teacher_loss": 0.18916043639183044 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5572981238365173, + "learning_rate": 1.5561659679051614e-05, + "loss": 0.21, + "step": 3588, + "teacher_loss": 0.17144931852817535 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.8200153708457947, + "learning_rate": 1.5565996819430392e-05, + "loss": 0.2644, + "step": 3589, + "teacher_loss": 0.20262448489665985 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.8211898803710938, + "learning_rate": 1.5570333959809166e-05, + "loss": 0.2491, + "step": 3590, + "teacher_loss": 0.18556594848632812 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.3262423872947693, + "learning_rate": 1.557467110018794e-05, + "loss": 0.2848, + "step": 3591, + "teacher_loss": 0.2802233099937439 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.674504280090332, + "learning_rate": 1.5579008240566718e-05, + "loss": 0.2646, + "step": 3592, + "teacher_loss": 0.2190292775630951 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.484086275100708, + "learning_rate": 1.5583345380945496e-05, + "loss": 0.2361, + "step": 3593, + "teacher_loss": 0.20854628086090088 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.7004497647285461, + "learning_rate": 1.5587682521324273e-05, + "loss": 0.2778, + "step": 3594, + "teacher_loss": 0.23078963160514832 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6153299808502197, + "learning_rate": 1.559201966170305e-05, + "loss": 0.3487, + "step": 3595, + "teacher_loss": 0.3191266357898712 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.3894866108894348, + "learning_rate": 1.559635680208183e-05, + "loss": 0.2691, + "step": 3596, + "teacher_loss": 0.2556788921356201 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5176110863685608, + "learning_rate": 1.5600693942460606e-05, + "loss": 0.3439, + "step": 3597, + "teacher_loss": 0.324614018201828 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6580513119697571, + "learning_rate": 1.5605031082839384e-05, + "loss": 0.3072, + "step": 3598, + "teacher_loss": 0.2681823968887329 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.3736642003059387, + "learning_rate": 1.560936822321816e-05, + "loss": 0.2576, + "step": 3599, + "teacher_loss": 0.2447434663772583 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.47863340377807617, + "learning_rate": 1.5613705363596936e-05, + "loss": 0.2055, + "step": 3600, + "teacher_loss": 0.17515403032302856 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.7348597645759583, + "learning_rate": 1.561804250397571e-05, + "loss": 0.292, + "step": 3601, + "teacher_loss": 0.24282091856002808 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.767880916595459, + "learning_rate": 1.5622379644354488e-05, + "loss": 0.3102, + "step": 3602, + "teacher_loss": 0.2593521475791931 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.4968601167201996, + "learning_rate": 1.5626716784733265e-05, + "loss": 0.2183, + "step": 3603, + "teacher_loss": 0.18730475008487701 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.2474328577518463, + "learning_rate": 1.5631053925112043e-05, + "loss": 0.2305, + "step": 3604, + "teacher_loss": 0.22860580682754517 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.2972242534160614, + "learning_rate": 1.563539106549082e-05, + "loss": 0.2602, + "step": 3605, + "teacher_loss": 0.25604140758514404 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.354631245136261, + "learning_rate": 1.5639728205869598e-05, + "loss": 0.277, + "step": 3606, + "teacher_loss": 0.2684188485145569 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.4142345190048218, + "learning_rate": 1.5644065346248376e-05, + "loss": 0.2955, + "step": 3607, + "teacher_loss": 0.28236067295074463 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6902436017990112, + "learning_rate": 1.5648402486627153e-05, + "loss": 0.3126, + "step": 3608, + "teacher_loss": 0.2706823945045471 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.42645570635795593, + "learning_rate": 1.5652739627005927e-05, + "loss": 0.2462, + "step": 3609, + "teacher_loss": 0.22611650824546814 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.4952872693538666, + "learning_rate": 1.5657076767384705e-05, + "loss": 0.2736, + "step": 3610, + "teacher_loss": 0.24896839261054993 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.7140209674835205, + "learning_rate": 1.5661413907763483e-05, + "loss": 0.3263, + "step": 3611, + "teacher_loss": 0.28323644399642944 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5983753204345703, + "learning_rate": 1.5665751048142257e-05, + "loss": 0.2586, + "step": 3612, + "teacher_loss": 0.22081002593040466 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6172915697097778, + "learning_rate": 1.5670088188521035e-05, + "loss": 0.2629, + "step": 3613, + "teacher_loss": 0.2235327959060669 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.49808406829833984, + "learning_rate": 1.5674425328899812e-05, + "loss": 0.3195, + "step": 3614, + "teacher_loss": 0.29966092109680176 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5055669546127319, + "learning_rate": 1.567876246927859e-05, + "loss": 0.2478, + "step": 3615, + "teacher_loss": 0.2191312611103058 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.45741939544677734, + "learning_rate": 1.5683099609657367e-05, + "loss": 0.3231, + "step": 3616, + "teacher_loss": 0.30820775032043457 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.5473480224609375, + "learning_rate": 1.5687436750036145e-05, + "loss": 0.3724, + "step": 3617, + "teacher_loss": 0.3529682755470276 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.2577945590019226, + "learning_rate": 1.569177389041492e-05, + "loss": 0.2059, + "step": 3618, + "teacher_loss": 0.2001163363456726 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.519547700881958, + "learning_rate": 1.5696111030793697e-05, + "loss": 0.3138, + "step": 3619, + "teacher_loss": 0.2909475862979889 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.6143239736557007, + "learning_rate": 1.5700448171172475e-05, + "loss": 0.3349, + "step": 3620, + "teacher_loss": 0.30390843749046326 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.7262749671936035, + "learning_rate": 1.5704785311551252e-05, + "loss": 0.2743, + "step": 3621, + "teacher_loss": 0.22408419847488403 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.491428017616272, + "learning_rate": 1.5709122451930026e-05, + "loss": 0.2906, + "step": 3622, + "teacher_loss": 0.2682454586029053 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.1648576259613037, + "learning_rate": 1.5713459592308804e-05, + "loss": 0.1881, + "step": 3623, + "teacher_loss": 0.1906564086675644 + }, + { + "compression_loss": 0.0, + "epoch": 0.65, + "label_loss": 0.45461952686309814, + "learning_rate": 1.571779673268758e-05, + "loss": 0.2144, + "step": 3624, + "teacher_loss": 0.18766288459300995 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5948424339294434, + "learning_rate": 1.572213387306636e-05, + "loss": 0.2691, + "step": 3625, + "teacher_loss": 0.23289819061756134 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5437354445457458, + "learning_rate": 1.5726471013445133e-05, + "loss": 0.2959, + "step": 3626, + "teacher_loss": 0.268393337726593 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.4256640076637268, + "learning_rate": 1.573080815382391e-05, + "loss": 0.2178, + "step": 3627, + "teacher_loss": 0.19470307230949402 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.6151233315467834, + "learning_rate": 1.573514529420269e-05, + "loss": 0.2822, + "step": 3628, + "teacher_loss": 0.24517272412776947 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5243526697158813, + "learning_rate": 1.5739482434581466e-05, + "loss": 0.269, + "step": 3629, + "teacher_loss": 0.24066051840782166 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.32326650619506836, + "learning_rate": 1.5743819574960244e-05, + "loss": 0.1649, + "step": 3630, + "teacher_loss": 0.14732235670089722 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.48410725593566895, + "learning_rate": 1.574815671533902e-05, + "loss": 0.3168, + "step": 3631, + "teacher_loss": 0.2982001006603241 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3797553777694702, + "learning_rate": 1.57524938557178e-05, + "loss": 0.1818, + "step": 3632, + "teacher_loss": 0.15977589786052704 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.42237675189971924, + "learning_rate": 1.5756830996096573e-05, + "loss": 0.3324, + "step": 3633, + "teacher_loss": 0.32245340943336487 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.1967628002166748, + "learning_rate": 1.576116813647535e-05, + "loss": 0.2138, + "step": 3634, + "teacher_loss": 0.21568498015403748 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5049176216125488, + "learning_rate": 1.5765505276854125e-05, + "loss": 0.2319, + "step": 3635, + "teacher_loss": 0.20160309970378876 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.2041967511177063, + "learning_rate": 1.5769842417232903e-05, + "loss": 0.1761, + "step": 3636, + "teacher_loss": 0.17294825613498688 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5666379928588867, + "learning_rate": 1.577417955761168e-05, + "loss": 0.2611, + "step": 3637, + "teacher_loss": 0.2271299809217453 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.8996134996414185, + "learning_rate": 1.5778516697990458e-05, + "loss": 0.3724, + "step": 3638, + "teacher_loss": 0.31387490034103394 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 1.3876512050628662, + "learning_rate": 1.5782853838369236e-05, + "loss": 0.4111, + "step": 3639, + "teacher_loss": 0.3026413917541504 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.637281596660614, + "learning_rate": 1.5787190978748013e-05, + "loss": 0.2142, + "step": 3640, + "teacher_loss": 0.16713929176330566 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.46090635657310486, + "learning_rate": 1.579152811912679e-05, + "loss": 0.2423, + "step": 3641, + "teacher_loss": 0.21799179911613464 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.35091036558151245, + "learning_rate": 1.579586525950557e-05, + "loss": 0.267, + "step": 3642, + "teacher_loss": 0.2576674818992615 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.2262699007987976, + "learning_rate": 1.5800202399884346e-05, + "loss": 0.2466, + "step": 3643, + "teacher_loss": 0.2488035261631012 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.346238374710083, + "learning_rate": 1.5804539540263117e-05, + "loss": 0.2416, + "step": 3644, + "teacher_loss": 0.22998301684856415 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.36392971873283386, + "learning_rate": 1.5808876680641895e-05, + "loss": 0.2901, + "step": 3645, + "teacher_loss": 0.281871497631073 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.514195442199707, + "learning_rate": 1.5813213821020672e-05, + "loss": 0.2338, + "step": 3646, + "teacher_loss": 0.20267260074615479 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3243282437324524, + "learning_rate": 1.581755096139945e-05, + "loss": 0.2267, + "step": 3647, + "teacher_loss": 0.21585705876350403 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5454914569854736, + "learning_rate": 1.5821888101778228e-05, + "loss": 0.3048, + "step": 3648, + "teacher_loss": 0.27808240056037903 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.4907207489013672, + "learning_rate": 1.5826225242157005e-05, + "loss": 0.1717, + "step": 3649, + "teacher_loss": 0.13626837730407715 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.49814796447753906, + "learning_rate": 1.5830562382535783e-05, + "loss": 0.2262, + "step": 3650, + "teacher_loss": 0.195997953414917 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3613089323043823, + "learning_rate": 1.583489952291456e-05, + "loss": 0.1965, + "step": 3651, + "teacher_loss": 0.17814019322395325 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3954395651817322, + "learning_rate": 1.5839236663293338e-05, + "loss": 0.2655, + "step": 3652, + "teacher_loss": 0.251077800989151 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5293263792991638, + "learning_rate": 1.5843573803672112e-05, + "loss": 0.2742, + "step": 3653, + "teacher_loss": 0.24580731987953186 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.39331650733947754, + "learning_rate": 1.584791094405089e-05, + "loss": 0.2385, + "step": 3654, + "teacher_loss": 0.22134628891944885 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.28252631425857544, + "learning_rate": 1.5852248084429664e-05, + "loss": 0.2135, + "step": 3655, + "teacher_loss": 0.20582029223442078 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 1.1032443046569824, + "learning_rate": 1.5856585224808442e-05, + "loss": 0.3304, + "step": 3656, + "teacher_loss": 0.24451056122779846 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.19120267033576965, + "learning_rate": 1.586092236518722e-05, + "loss": 0.1815, + "step": 3657, + "teacher_loss": 0.1804046928882599 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3292529582977295, + "learning_rate": 1.5865259505565997e-05, + "loss": 0.3004, + "step": 3658, + "teacher_loss": 0.2971589267253876 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5031317472457886, + "learning_rate": 1.5869596645944775e-05, + "loss": 0.3029, + "step": 3659, + "teacher_loss": 0.2806471884250641 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3349095582962036, + "learning_rate": 1.5873933786323552e-05, + "loss": 0.1937, + "step": 3660, + "teacher_loss": 0.17799094319343567 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.9139611721038818, + "learning_rate": 1.587827092670233e-05, + "loss": 0.3291, + "step": 3661, + "teacher_loss": 0.2641031742095947 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.6306778788566589, + "learning_rate": 1.5882608067081104e-05, + "loss": 0.4089, + "step": 3662, + "teacher_loss": 0.3842778503894806 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.29057514667510986, + "learning_rate": 1.5886945207459882e-05, + "loss": 0.2828, + "step": 3663, + "teacher_loss": 0.28189635276794434 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.40674033761024475, + "learning_rate": 1.589128234783866e-05, + "loss": 0.2813, + "step": 3664, + "teacher_loss": 0.2673141360282898 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.4503912031650543, + "learning_rate": 1.5895619488217437e-05, + "loss": 0.2392, + "step": 3665, + "teacher_loss": 0.21576957404613495 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.7979863882064819, + "learning_rate": 1.589995662859621e-05, + "loss": 0.2841, + "step": 3666, + "teacher_loss": 0.2270311415195465 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3613271415233612, + "learning_rate": 1.590429376897499e-05, + "loss": 0.2886, + "step": 3667, + "teacher_loss": 0.28050917387008667 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.276175856590271, + "learning_rate": 1.5908630909353767e-05, + "loss": 0.2573, + "step": 3668, + "teacher_loss": 0.2552304267883301 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.17194274067878723, + "learning_rate": 1.5912968049732544e-05, + "loss": 0.1979, + "step": 3669, + "teacher_loss": 0.20079147815704346 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.43235665559768677, + "learning_rate": 1.591730519011132e-05, + "loss": 0.1966, + "step": 3670, + "teacher_loss": 0.1704035997390747 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.5696755647659302, + "learning_rate": 1.5921642330490096e-05, + "loss": 0.2245, + "step": 3671, + "teacher_loss": 0.18612641096115112 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3324525058269501, + "learning_rate": 1.5925979470868874e-05, + "loss": 0.2732, + "step": 3672, + "teacher_loss": 0.2666308283805847 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.24366751313209534, + "learning_rate": 1.593031661124765e-05, + "loss": 0.2588, + "step": 3673, + "teacher_loss": 0.2605050504207611 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.3631001114845276, + "learning_rate": 1.593465375162643e-05, + "loss": 0.2284, + "step": 3674, + "teacher_loss": 0.2134379893541336 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.31601378321647644, + "learning_rate": 1.5938990892005206e-05, + "loss": 0.2063, + "step": 3675, + "teacher_loss": 0.19411563873291016 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.24740441143512726, + "learning_rate": 1.5943328032383984e-05, + "loss": 0.2328, + "step": 3676, + "teacher_loss": 0.2312251478433609 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.496989905834198, + "learning_rate": 1.594766517276276e-05, + "loss": 0.3291, + "step": 3677, + "teacher_loss": 0.31045952439308167 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.22733494639396667, + "learning_rate": 1.5952002313141536e-05, + "loss": 0.2471, + "step": 3678, + "teacher_loss": 0.24927252531051636 + }, + { + "compression_loss": 0.0, + "epoch": 0.66, + "label_loss": 0.23540058732032776, + "learning_rate": 1.595633945352031e-05, + "loss": 0.2549, + "step": 3679, + "teacher_loss": 0.2570544481277466 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.40414029359817505, + "learning_rate": 1.5960676593899088e-05, + "loss": 0.2912, + "step": 3680, + "teacher_loss": 0.27861422300338745 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.5732398629188538, + "learning_rate": 1.5965013734277865e-05, + "loss": 0.349, + "step": 3681, + "teacher_loss": 0.3240451514720917 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.29784345626831055, + "learning_rate": 1.5969350874656643e-05, + "loss": 0.2169, + "step": 3682, + "teacher_loss": 0.20796120166778564 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.3988625407218933, + "learning_rate": 1.597368801503542e-05, + "loss": 0.2253, + "step": 3683, + "teacher_loss": 0.2060111165046692 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.3626754879951477, + "learning_rate": 1.5978025155414198e-05, + "loss": 0.1892, + "step": 3684, + "teacher_loss": 0.16995102167129517 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.17476633191108704, + "learning_rate": 1.5982362295792976e-05, + "loss": 0.1658, + "step": 3685, + "teacher_loss": 0.16474947333335876 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 1.1083035469055176, + "learning_rate": 1.5986699436171754e-05, + "loss": 0.339, + "step": 3686, + "teacher_loss": 0.2535526752471924 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.5120720267295837, + "learning_rate": 1.599103657655053e-05, + "loss": 0.2415, + "step": 3687, + "teacher_loss": 0.21138286590576172 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.31147801876068115, + "learning_rate": 1.5995373716929302e-05, + "loss": 0.2128, + "step": 3688, + "teacher_loss": 0.20180727541446686 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.2520235478878021, + "learning_rate": 1.599971085730808e-05, + "loss": 0.237, + "step": 3689, + "teacher_loss": 0.2353190779685974 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.6757015585899353, + "learning_rate": 1.6004047997686857e-05, + "loss": 0.2932, + "step": 3690, + "teacher_loss": 0.2507269084453583 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.32675647735595703, + "learning_rate": 1.6008385138065635e-05, + "loss": 0.2994, + "step": 3691, + "teacher_loss": 0.29632702469825745 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.49801209568977356, + "learning_rate": 1.6012722278444413e-05, + "loss": 0.303, + "step": 3692, + "teacher_loss": 0.28133079409599304 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.18499904870986938, + "learning_rate": 1.601705941882319e-05, + "loss": 0.2106, + "step": 3693, + "teacher_loss": 0.21348942816257477 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.43742239475250244, + "learning_rate": 1.6021396559201968e-05, + "loss": 0.2656, + "step": 3694, + "teacher_loss": 0.24650748074054718 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.47437185049057007, + "learning_rate": 1.6025733699580745e-05, + "loss": 0.2556, + "step": 3695, + "teacher_loss": 0.23133578896522522 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.28144145011901855, + "learning_rate": 1.6030070839959523e-05, + "loss": 0.2216, + "step": 3696, + "teacher_loss": 0.21491554379463196 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.698313295841217, + "learning_rate": 1.6034407980338297e-05, + "loss": 0.2697, + "step": 3697, + "teacher_loss": 0.22211724519729614 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.23900443315505981, + "learning_rate": 1.6038745120717075e-05, + "loss": 0.2505, + "step": 3698, + "teacher_loss": 0.2517843246459961 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.4733828008174896, + "learning_rate": 1.604308226109585e-05, + "loss": 0.2223, + "step": 3699, + "teacher_loss": 0.19442662596702576 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.6261241436004639, + "learning_rate": 1.6047419401474627e-05, + "loss": 0.3006, + "step": 3700, + "teacher_loss": 0.26447755098342896 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.7924472093582153, + "learning_rate": 1.6051756541853404e-05, + "loss": 0.352, + "step": 3701, + "teacher_loss": 0.3030155301094055 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.31122374534606934, + "learning_rate": 1.6056093682232182e-05, + "loss": 0.204, + "step": 3702, + "teacher_loss": 0.19211140275001526 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.428591251373291, + "learning_rate": 1.606043082261096e-05, + "loss": 0.2274, + "step": 3703, + "teacher_loss": 0.2050548642873764 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.4345467686653137, + "learning_rate": 1.6064767962989737e-05, + "loss": 0.1722, + "step": 3704, + "teacher_loss": 0.143045574426651 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.37999799847602844, + "learning_rate": 1.6069105103368515e-05, + "loss": 0.229, + "step": 3705, + "teacher_loss": 0.21225020289421082 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.27530303597450256, + "learning_rate": 1.607344224374729e-05, + "loss": 0.1973, + "step": 3706, + "teacher_loss": 0.18861901760101318 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.08717583864927292, + "learning_rate": 1.6077779384126067e-05, + "loss": 0.2106, + "step": 3707, + "teacher_loss": 0.2242676317691803 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.27346599102020264, + "learning_rate": 1.6082116524504844e-05, + "loss": 0.2096, + "step": 3708, + "teacher_loss": 0.20248793065547943 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.29856640100479126, + "learning_rate": 1.6086453664883622e-05, + "loss": 0.2314, + "step": 3709, + "teacher_loss": 0.2239629030227661 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.23751837015151978, + "learning_rate": 1.6090790805262396e-05, + "loss": 0.179, + "step": 3710, + "teacher_loss": 0.17245493829250336 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.24449267983436584, + "learning_rate": 1.6095127945641174e-05, + "loss": 0.1353, + "step": 3711, + "teacher_loss": 0.12312253564596176 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.22178930044174194, + "learning_rate": 1.609946508601995e-05, + "loss": 0.2283, + "step": 3712, + "teacher_loss": 0.22905665636062622 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 1.222865104675293, + "learning_rate": 1.610380222639873e-05, + "loss": 0.8242, + "step": 3713, + "teacher_loss": 0.7799502611160278 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.44414788484573364, + "learning_rate": 1.6108139366777507e-05, + "loss": 0.2449, + "step": 3714, + "teacher_loss": 0.22273987531661987 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.4587915539741516, + "learning_rate": 1.611247650715628e-05, + "loss": 0.2597, + "step": 3715, + "teacher_loss": 0.23758479952812195 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.21698248386383057, + "learning_rate": 1.611681364753506e-05, + "loss": 0.1828, + "step": 3716, + "teacher_loss": 0.1790435016155243 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.4231051206588745, + "learning_rate": 1.6121150787913836e-05, + "loss": 0.2469, + "step": 3717, + "teacher_loss": 0.2272929549217224 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.6544105410575867, + "learning_rate": 1.6125487928292614e-05, + "loss": 0.2112, + "step": 3718, + "teacher_loss": 0.1619209498167038 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.5293720364570618, + "learning_rate": 1.612982506867139e-05, + "loss": 0.2618, + "step": 3719, + "teacher_loss": 0.23203915357589722 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.3657592535018921, + "learning_rate": 1.6134162209050166e-05, + "loss": 0.2251, + "step": 3720, + "teacher_loss": 0.20950651168823242 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.5583572387695312, + "learning_rate": 1.6138499349428943e-05, + "loss": 0.4385, + "step": 3721, + "teacher_loss": 0.42514514923095703 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.38378602266311646, + "learning_rate": 1.614283648980772e-05, + "loss": 0.2203, + "step": 3722, + "teacher_loss": 0.2021283209323883 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.8183550834655762, + "learning_rate": 1.6147173630186495e-05, + "loss": 0.4684, + "step": 3723, + "teacher_loss": 0.42947232723236084 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.3264024257659912, + "learning_rate": 1.6151510770565273e-05, + "loss": 0.2316, + "step": 3724, + "teacher_loss": 0.22109563648700714 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.6472335457801819, + "learning_rate": 1.615584791094405e-05, + "loss": 0.3666, + "step": 3725, + "teacher_loss": 0.33537721633911133 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.27490532398223877, + "learning_rate": 1.6160185051322828e-05, + "loss": 0.2683, + "step": 3726, + "teacher_loss": 0.26755839586257935 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.8450971245765686, + "learning_rate": 1.6164522191701606e-05, + "loss": 0.2502, + "step": 3727, + "teacher_loss": 0.18413874506950378 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.5311121940612793, + "learning_rate": 1.6168859332080383e-05, + "loss": 0.3263, + "step": 3728, + "teacher_loss": 0.30357107520103455 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.3031958341598511, + "learning_rate": 1.617319647245916e-05, + "loss": 0.2363, + "step": 3729, + "teacher_loss": 0.2288408875465393 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.3416759669780731, + "learning_rate": 1.617753361283794e-05, + "loss": 0.1788, + "step": 3730, + "teacher_loss": 0.16065427660942078 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.4747071862220764, + "learning_rate": 1.6181870753216713e-05, + "loss": 0.2281, + "step": 3731, + "teacher_loss": 0.2007313072681427 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.5879024863243103, + "learning_rate": 1.6186207893595487e-05, + "loss": 0.2583, + "step": 3732, + "teacher_loss": 0.22163701057434082 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.34162527322769165, + "learning_rate": 1.6190545033974265e-05, + "loss": 0.2115, + "step": 3733, + "teacher_loss": 0.19700568914413452 + }, + { + "compression_loss": 0.0, + "epoch": 0.67, + "label_loss": 0.24916669726371765, + "learning_rate": 1.6194882174353042e-05, + "loss": 0.3292, + "step": 3734, + "teacher_loss": 0.3380778431892395 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5221686363220215, + "learning_rate": 1.619921931473182e-05, + "loss": 0.2508, + "step": 3735, + "teacher_loss": 0.22060123085975647 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2968100309371948, + "learning_rate": 1.6203556455110597e-05, + "loss": 0.2629, + "step": 3736, + "teacher_loss": 0.2591715455055237 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.17533570528030396, + "learning_rate": 1.6207893595489375e-05, + "loss": 0.2154, + "step": 3737, + "teacher_loss": 0.21987247467041016 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5514644384384155, + "learning_rate": 1.6212230735868153e-05, + "loss": 0.2708, + "step": 3738, + "teacher_loss": 0.23958361148834229 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2269068956375122, + "learning_rate": 1.621656787624693e-05, + "loss": 0.2207, + "step": 3739, + "teacher_loss": 0.22006107866764069 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2766556441783905, + "learning_rate": 1.6220905016625708e-05, + "loss": 0.2143, + "step": 3740, + "teacher_loss": 0.20733410120010376 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.3741043210029602, + "learning_rate": 1.6225242157004482e-05, + "loss": 0.2323, + "step": 3741, + "teacher_loss": 0.21649107336997986 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5447278618812561, + "learning_rate": 1.6229579297383256e-05, + "loss": 0.2957, + "step": 3742, + "teacher_loss": 0.26798418164253235 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.8393168449401855, + "learning_rate": 1.6233916437762034e-05, + "loss": 0.273, + "step": 3743, + "teacher_loss": 0.21010853350162506 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.4853748679161072, + "learning_rate": 1.623825357814081e-05, + "loss": 0.2402, + "step": 3744, + "teacher_loss": 0.21292205154895782 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5697771310806274, + "learning_rate": 1.624259071851959e-05, + "loss": 0.2511, + "step": 3745, + "teacher_loss": 0.21569518744945526 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.34384316205978394, + "learning_rate": 1.6246927858898367e-05, + "loss": 0.2566, + "step": 3746, + "teacher_loss": 0.24689540266990662 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.3202267587184906, + "learning_rate": 1.6251264999277144e-05, + "loss": 0.2682, + "step": 3747, + "teacher_loss": 0.2624393701553345 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6127591729164124, + "learning_rate": 1.6255602139655922e-05, + "loss": 0.274, + "step": 3748, + "teacher_loss": 0.23634150624275208 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5328792929649353, + "learning_rate": 1.62599392800347e-05, + "loss": 0.2923, + "step": 3749, + "teacher_loss": 0.2655397057533264 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2766993045806885, + "learning_rate": 1.6264276420413474e-05, + "loss": 0.2154, + "step": 3750, + "teacher_loss": 0.20853738486766815 + }, + { + "epoch": 0.68, + "eval_exact_match": 79.80132450331126, + "eval_f1": 87.25872130468825, + "step": 3750 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5422698259353638, + "learning_rate": 1.626861356079225e-05, + "loss": 0.3071, + "step": 3751, + "teacher_loss": 0.28099894523620605 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.18106874823570251, + "learning_rate": 1.627295070117103e-05, + "loss": 0.1843, + "step": 3752, + "teacher_loss": 0.184663325548172 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.4139200448989868, + "learning_rate": 1.6277287841549803e-05, + "loss": 0.2942, + "step": 3753, + "teacher_loss": 0.2808450758457184 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.3906204104423523, + "learning_rate": 1.628162498192858e-05, + "loss": 0.207, + "step": 3754, + "teacher_loss": 0.18658968806266785 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5382830500602722, + "learning_rate": 1.628596212230736e-05, + "loss": 0.2549, + "step": 3755, + "teacher_loss": 0.22344517707824707 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.7961368560791016, + "learning_rate": 1.6290299262686136e-05, + "loss": 0.2384, + "step": 3756, + "teacher_loss": 0.17644289135932922 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2446788251399994, + "learning_rate": 1.6294636403064914e-05, + "loss": 0.1743, + "step": 3757, + "teacher_loss": 0.16648009419441223 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2495914101600647, + "learning_rate": 1.629897354344369e-05, + "loss": 0.3013, + "step": 3758, + "teacher_loss": 0.30702510476112366 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.3456244468688965, + "learning_rate": 1.6303310683822466e-05, + "loss": 0.215, + "step": 3759, + "teacher_loss": 0.2005200833082199 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.7934640645980835, + "learning_rate": 1.6307647824201243e-05, + "loss": 0.265, + "step": 3760, + "teacher_loss": 0.2062867283821106 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6742236614227295, + "learning_rate": 1.631198496458002e-05, + "loss": 0.3627, + "step": 3761, + "teacher_loss": 0.3280717134475708 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.8747836351394653, + "learning_rate": 1.63163221049588e-05, + "loss": 0.3633, + "step": 3762, + "teacher_loss": 0.30644142627716064 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2645156681537628, + "learning_rate": 1.6320659245337576e-05, + "loss": 0.2345, + "step": 3763, + "teacher_loss": 0.23113486170768738 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 1.065383791923523, + "learning_rate": 1.632499638571635e-05, + "loss": 0.4306, + "step": 3764, + "teacher_loss": 0.3600945472717285 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.49690085649490356, + "learning_rate": 1.6329333526095128e-05, + "loss": 0.2564, + "step": 3765, + "teacher_loss": 0.2296697348356247 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.42714372277259827, + "learning_rate": 1.6333670666473906e-05, + "loss": 0.4209, + "step": 3766, + "teacher_loss": 0.4202342629432678 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.8831572532653809, + "learning_rate": 1.633800780685268e-05, + "loss": 0.2951, + "step": 3767, + "teacher_loss": 0.22979632019996643 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6498720049858093, + "learning_rate": 1.6342344947231458e-05, + "loss": 0.3911, + "step": 3768, + "teacher_loss": 0.36231428384780884 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.7952154874801636, + "learning_rate": 1.6346682087610235e-05, + "loss": 0.2841, + "step": 3769, + "teacher_loss": 0.22733429074287415 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5044256448745728, + "learning_rate": 1.6351019227989013e-05, + "loss": 0.2529, + "step": 3770, + "teacher_loss": 0.22497303783893585 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6302840709686279, + "learning_rate": 1.635535636836779e-05, + "loss": 0.3205, + "step": 3771, + "teacher_loss": 0.2860368490219116 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.46884235739707947, + "learning_rate": 1.6359693508746568e-05, + "loss": 0.2669, + "step": 3772, + "teacher_loss": 0.24443745613098145 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.2000223994255066, + "learning_rate": 1.6364030649125346e-05, + "loss": 0.1582, + "step": 3773, + "teacher_loss": 0.15359479188919067 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6544150114059448, + "learning_rate": 1.6368367789504123e-05, + "loss": 0.3086, + "step": 3774, + "teacher_loss": 0.2701876759529114 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.4003007113933563, + "learning_rate": 1.6372704929882898e-05, + "loss": 0.2441, + "step": 3775, + "teacher_loss": 0.22669926285743713 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6334882974624634, + "learning_rate": 1.6377042070261672e-05, + "loss": 0.2944, + "step": 3776, + "teacher_loss": 0.2566993832588196 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.49278002977371216, + "learning_rate": 1.638137921064045e-05, + "loss": 0.2544, + "step": 3777, + "teacher_loss": 0.2279680073261261 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.24746474623680115, + "learning_rate": 1.6385716351019227e-05, + "loss": 0.2567, + "step": 3778, + "teacher_loss": 0.257731556892395 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.43695637583732605, + "learning_rate": 1.6390053491398005e-05, + "loss": 0.2379, + "step": 3779, + "teacher_loss": 0.2158331274986267 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.3419543206691742, + "learning_rate": 1.6394390631776782e-05, + "loss": 0.2232, + "step": 3780, + "teacher_loss": 0.21001383662223816 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.47850456833839417, + "learning_rate": 1.639872777215556e-05, + "loss": 0.3036, + "step": 3781, + "teacher_loss": 0.2841114401817322 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5205943584442139, + "learning_rate": 1.6403064912534338e-05, + "loss": 0.4213, + "step": 3782, + "teacher_loss": 0.4102362096309662 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.3042946457862854, + "learning_rate": 1.6407402052913115e-05, + "loss": 0.2381, + "step": 3783, + "teacher_loss": 0.2307826578617096 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.14177286624908447, + "learning_rate": 1.6411739193291893e-05, + "loss": 0.2254, + "step": 3784, + "teacher_loss": 0.23466037213802338 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.27628758549690247, + "learning_rate": 1.6416076333670667e-05, + "loss": 0.235, + "step": 3785, + "teacher_loss": 0.23043525218963623 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.5149080753326416, + "learning_rate": 1.642041347404944e-05, + "loss": 0.2615, + "step": 3786, + "teacher_loss": 0.2333323061466217 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.4056503176689148, + "learning_rate": 1.642475061442822e-05, + "loss": 0.2658, + "step": 3787, + "teacher_loss": 0.250255823135376 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.37076690793037415, + "learning_rate": 1.6429087754806996e-05, + "loss": 0.2991, + "step": 3788, + "teacher_loss": 0.29112833738327026 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.6789429187774658, + "learning_rate": 1.6433424895185774e-05, + "loss": 0.2476, + "step": 3789, + "teacher_loss": 0.1996225267648697 + }, + { + "compression_loss": 0.0, + "epoch": 0.68, + "label_loss": 0.35509172081947327, + "learning_rate": 1.6437762035564552e-05, + "loss": 0.2053, + "step": 3790, + "teacher_loss": 0.1886591911315918 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.29993876814842224, + "learning_rate": 1.644209917594333e-05, + "loss": 0.2615, + "step": 3791, + "teacher_loss": 0.2572597861289978 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.27062827348709106, + "learning_rate": 1.6446436316322107e-05, + "loss": 0.2046, + "step": 3792, + "teacher_loss": 0.19727930426597595 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.4060091972351074, + "learning_rate": 1.6450773456700885e-05, + "loss": 0.2204, + "step": 3793, + "teacher_loss": 0.19974222779273987 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3982829451560974, + "learning_rate": 1.645511059707966e-05, + "loss": 0.2433, + "step": 3794, + "teacher_loss": 0.22608813643455505 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5104777812957764, + "learning_rate": 1.6459447737458436e-05, + "loss": 0.2382, + "step": 3795, + "teacher_loss": 0.2079637348651886 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.19846825301647186, + "learning_rate": 1.6463784877837214e-05, + "loss": 0.2074, + "step": 3796, + "teacher_loss": 0.20840394496917725 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.32102543115615845, + "learning_rate": 1.6468122018215988e-05, + "loss": 0.226, + "step": 3797, + "teacher_loss": 0.21544566750526428 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.2557727098464966, + "learning_rate": 1.6472459158594766e-05, + "loss": 0.218, + "step": 3798, + "teacher_loss": 0.21380352973937988 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.33325809240341187, + "learning_rate": 1.6476796298973544e-05, + "loss": 0.2319, + "step": 3799, + "teacher_loss": 0.2206037938594818 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.7315468788146973, + "learning_rate": 1.648113343935232e-05, + "loss": 0.3161, + "step": 3800, + "teacher_loss": 0.2699926495552063 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.4144538938999176, + "learning_rate": 1.64854705797311e-05, + "loss": 0.212, + "step": 3801, + "teacher_loss": 0.1895418018102646 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.2488737404346466, + "learning_rate": 1.6489807720109876e-05, + "loss": 0.1737, + "step": 3802, + "teacher_loss": 0.16532915830612183 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3568645119667053, + "learning_rate": 1.649414486048865e-05, + "loss": 0.1975, + "step": 3803, + "teacher_loss": 0.1798364520072937 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5763231515884399, + "learning_rate": 1.6498482000867428e-05, + "loss": 0.2668, + "step": 3804, + "teacher_loss": 0.23243044316768646 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.7315685153007507, + "learning_rate": 1.6502819141246206e-05, + "loss": 0.2342, + "step": 3805, + "teacher_loss": 0.1789829134941101 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.40255996584892273, + "learning_rate": 1.6507156281624983e-05, + "loss": 0.2568, + "step": 3806, + "teacher_loss": 0.24063995480537415 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.49115222692489624, + "learning_rate": 1.651149342200376e-05, + "loss": 0.2241, + "step": 3807, + "teacher_loss": 0.1944597363471985 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.24985545873641968, + "learning_rate": 1.6515830562382535e-05, + "loss": 0.2956, + "step": 3808, + "teacher_loss": 0.30067986249923706 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.17164045572280884, + "learning_rate": 1.6520167702761313e-05, + "loss": 0.1657, + "step": 3809, + "teacher_loss": 0.16505427658557892 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5039825439453125, + "learning_rate": 1.652450484314009e-05, + "loss": 0.2691, + "step": 3810, + "teacher_loss": 0.2430124431848526 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.34272128343582153, + "learning_rate": 1.6528841983518865e-05, + "loss": 0.2189, + "step": 3811, + "teacher_loss": 0.20516183972358704 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.36600738763809204, + "learning_rate": 1.6533179123897642e-05, + "loss": 0.2262, + "step": 3812, + "teacher_loss": 0.21065667271614075 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5470266342163086, + "learning_rate": 1.653751626427642e-05, + "loss": 0.3718, + "step": 3813, + "teacher_loss": 0.3523100018501282 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.27601104974746704, + "learning_rate": 1.6541853404655198e-05, + "loss": 0.1888, + "step": 3814, + "teacher_loss": 0.17915800213813782 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.37931209802627563, + "learning_rate": 1.6546190545033975e-05, + "loss": 0.2098, + "step": 3815, + "teacher_loss": 0.19093045592308044 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3809993267059326, + "learning_rate": 1.6550527685412753e-05, + "loss": 0.2825, + "step": 3816, + "teacher_loss": 0.27150198817253113 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.4369770288467407, + "learning_rate": 1.655486482579153e-05, + "loss": 0.2191, + "step": 3817, + "teacher_loss": 0.19484980404376984 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.39471060037612915, + "learning_rate": 1.6559201966170305e-05, + "loss": 0.2919, + "step": 3818, + "teacher_loss": 0.2804563045501709 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5885171294212341, + "learning_rate": 1.6563539106549082e-05, + "loss": 0.2662, + "step": 3819, + "teacher_loss": 0.2304316759109497 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3859420120716095, + "learning_rate": 1.6567876246927857e-05, + "loss": 0.2486, + "step": 3820, + "teacher_loss": 0.2333768606185913 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.8798807859420776, + "learning_rate": 1.6572213387306634e-05, + "loss": 0.2929, + "step": 3821, + "teacher_loss": 0.22762683033943176 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.2645268738269806, + "learning_rate": 1.6576550527685412e-05, + "loss": 0.14, + "step": 3822, + "teacher_loss": 0.1261518895626068 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.2211216688156128, + "learning_rate": 1.658088766806419e-05, + "loss": 0.2434, + "step": 3823, + "teacher_loss": 0.24587324261665344 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.20005196332931519, + "learning_rate": 1.6585224808442967e-05, + "loss": 0.215, + "step": 3824, + "teacher_loss": 0.2166530042886734 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.33525094389915466, + "learning_rate": 1.6589561948821745e-05, + "loss": 0.1988, + "step": 3825, + "teacher_loss": 0.18364958465099335 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.6016664505004883, + "learning_rate": 1.6593899089200522e-05, + "loss": 0.2154, + "step": 3826, + "teacher_loss": 0.17246496677398682 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.6605978608131409, + "learning_rate": 1.65982362295793e-05, + "loss": 0.3174, + "step": 3827, + "teacher_loss": 0.2792982757091522 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.8585014343261719, + "learning_rate": 1.6602573369958078e-05, + "loss": 0.333, + "step": 3828, + "teacher_loss": 0.27466362714767456 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3495725989341736, + "learning_rate": 1.660691051033685e-05, + "loss": 0.1991, + "step": 3829, + "teacher_loss": 0.1823853999376297 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 1.454666018486023, + "learning_rate": 1.6611247650715626e-05, + "loss": 0.377, + "step": 3830, + "teacher_loss": 0.2572720944881439 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5205580592155457, + "learning_rate": 1.6615584791094404e-05, + "loss": 0.3385, + "step": 3831, + "teacher_loss": 0.31829530000686646 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.279083788394928, + "learning_rate": 1.661992193147318e-05, + "loss": 0.2292, + "step": 3832, + "teacher_loss": 0.22361242771148682 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5379554629325867, + "learning_rate": 1.662425907185196e-05, + "loss": 0.3581, + "step": 3833, + "teacher_loss": 0.33814409375190735 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.4368254244327545, + "learning_rate": 1.6628596212230737e-05, + "loss": 0.2082, + "step": 3834, + "teacher_loss": 0.1828201562166214 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.39830639958381653, + "learning_rate": 1.6632933352609514e-05, + "loss": 0.2783, + "step": 3835, + "teacher_loss": 0.2650046944618225 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.24708837270736694, + "learning_rate": 1.6637270492988292e-05, + "loss": 0.2415, + "step": 3836, + "teacher_loss": 0.24087324738502502 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3038734197616577, + "learning_rate": 1.664160763336707e-05, + "loss": 0.2703, + "step": 3837, + "teacher_loss": 0.26660460233688354 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.3258134126663208, + "learning_rate": 1.6645944773745844e-05, + "loss": 0.3155, + "step": 3838, + "teacher_loss": 0.31432080268859863 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.25600236654281616, + "learning_rate": 1.665028191412462e-05, + "loss": 0.3679, + "step": 3839, + "teacher_loss": 0.3803739547729492 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.31062108278274536, + "learning_rate": 1.6654619054503396e-05, + "loss": 0.2346, + "step": 3840, + "teacher_loss": 0.22618205845355988 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.4096793532371521, + "learning_rate": 1.6658956194882173e-05, + "loss": 0.2591, + "step": 3841, + "teacher_loss": 0.24235542118549347 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.39860469102859497, + "learning_rate": 1.666329333526095e-05, + "loss": 0.2758, + "step": 3842, + "teacher_loss": 0.2621324062347412 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.9226051568984985, + "learning_rate": 1.666763047563973e-05, + "loss": 0.3211, + "step": 3843, + "teacher_loss": 0.2542799115180969 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.5054365992546082, + "learning_rate": 1.6671967616018506e-05, + "loss": 0.209, + "step": 3844, + "teacher_loss": 0.17602741718292236 + }, + { + "compression_loss": 0.0, + "epoch": 0.69, + "label_loss": 0.29463323950767517, + "learning_rate": 1.6676304756397284e-05, + "loss": 0.2589, + "step": 3845, + "teacher_loss": 0.25491857528686523 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5439555644989014, + "learning_rate": 1.668064189677606e-05, + "loss": 0.2678, + "step": 3846, + "teacher_loss": 0.2370859682559967 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.44368356466293335, + "learning_rate": 1.6684979037154836e-05, + "loss": 0.2363, + "step": 3847, + "teacher_loss": 0.2132757157087326 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5373855829238892, + "learning_rate": 1.6689316177533613e-05, + "loss": 0.2403, + "step": 3848, + "teacher_loss": 0.2072800248861313 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.1720067858695984, + "learning_rate": 1.669365331791239e-05, + "loss": 0.1374, + "step": 3849, + "teacher_loss": 0.1335633397102356 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.36526960134506226, + "learning_rate": 1.669799045829117e-05, + "loss": 0.2658, + "step": 3850, + "teacher_loss": 0.25472697615623474 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3932795226573944, + "learning_rate": 1.6702327598669943e-05, + "loss": 0.2397, + "step": 3851, + "teacher_loss": 0.22262638807296753 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5964947938919067, + "learning_rate": 1.670666473904872e-05, + "loss": 0.3338, + "step": 3852, + "teacher_loss": 0.30466634035110474 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3706209063529968, + "learning_rate": 1.6711001879427498e-05, + "loss": 0.2018, + "step": 3853, + "teacher_loss": 0.18306578695774078 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5303115844726562, + "learning_rate": 1.6715339019806275e-05, + "loss": 0.2795, + "step": 3854, + "teacher_loss": 0.25157755613327026 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5843782424926758, + "learning_rate": 1.6719676160185053e-05, + "loss": 0.2644, + "step": 3855, + "teacher_loss": 0.22886034846305847 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3444828987121582, + "learning_rate": 1.6724013300563827e-05, + "loss": 0.3409, + "step": 3856, + "teacher_loss": 0.34050512313842773 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.27396342158317566, + "learning_rate": 1.6728350440942605e-05, + "loss": 0.1724, + "step": 3857, + "teacher_loss": 0.16109535098075867 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.6663780212402344, + "learning_rate": 1.6732687581321383e-05, + "loss": 0.4939, + "step": 3858, + "teacher_loss": 0.47471052408218384 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.48984646797180176, + "learning_rate": 1.673702472170016e-05, + "loss": 0.229, + "step": 3859, + "teacher_loss": 0.20005908608436584 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.6088159680366516, + "learning_rate": 1.6741361862078938e-05, + "loss": 0.2461, + "step": 3860, + "teacher_loss": 0.2058192491531372 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.18091699481010437, + "learning_rate": 1.6745699002457715e-05, + "loss": 0.1785, + "step": 3861, + "teacher_loss": 0.17825454473495483 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.16881585121154785, + "learning_rate": 1.675003614283649e-05, + "loss": 0.1912, + "step": 3862, + "teacher_loss": 0.1937391310930252 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.25088709592819214, + "learning_rate": 1.6754373283215267e-05, + "loss": 0.2879, + "step": 3863, + "teacher_loss": 0.29205167293548584 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.41045036911964417, + "learning_rate": 1.675871042359404e-05, + "loss": 0.2649, + "step": 3864, + "teacher_loss": 0.24874994158744812 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.12417268753051758, + "learning_rate": 1.676304756397282e-05, + "loss": 0.1968, + "step": 3865, + "teacher_loss": 0.20484042167663574 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.2121150940656662, + "learning_rate": 1.6767384704351597e-05, + "loss": 0.2203, + "step": 3866, + "teacher_loss": 0.22119294106960297 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.9646523594856262, + "learning_rate": 1.6771721844730374e-05, + "loss": 0.2763, + "step": 3867, + "teacher_loss": 0.1998457908630371 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3803498148918152, + "learning_rate": 1.6776058985109152e-05, + "loss": 0.2534, + "step": 3868, + "teacher_loss": 0.23930513858795166 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.7884403467178345, + "learning_rate": 1.678039612548793e-05, + "loss": 0.3291, + "step": 3869, + "teacher_loss": 0.27804964780807495 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3104196786880493, + "learning_rate": 1.6784733265866707e-05, + "loss": 0.2356, + "step": 3870, + "teacher_loss": 0.2272539734840393 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.1442958265542984, + "learning_rate": 1.6789070406245485e-05, + "loss": 0.2038, + "step": 3871, + "teacher_loss": 0.21043211221694946 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5211657285690308, + "learning_rate": 1.6793407546624263e-05, + "loss": 0.2457, + "step": 3872, + "teacher_loss": 0.21509389579296112 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.413181871175766, + "learning_rate": 1.6797744687003033e-05, + "loss": 0.3, + "step": 3873, + "teacher_loss": 0.28739339113235474 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.69828200340271, + "learning_rate": 1.680208182738181e-05, + "loss": 0.3273, + "step": 3874, + "teacher_loss": 0.286125510931015 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3417855203151703, + "learning_rate": 1.680641896776059e-05, + "loss": 0.2462, + "step": 3875, + "teacher_loss": 0.23559951782226562 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.19873076677322388, + "learning_rate": 1.6810756108139366e-05, + "loss": 0.1843, + "step": 3876, + "teacher_loss": 0.18274015188217163 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.44638437032699585, + "learning_rate": 1.6815093248518144e-05, + "loss": 0.1962, + "step": 3877, + "teacher_loss": 0.1683947741985321 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.25737982988357544, + "learning_rate": 1.681943038889692e-05, + "loss": 0.2638, + "step": 3878, + "teacher_loss": 0.26449745893478394 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.4711773991584778, + "learning_rate": 1.68237675292757e-05, + "loss": 0.2916, + "step": 3879, + "teacher_loss": 0.2716251313686371 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3741251230239868, + "learning_rate": 1.6828104669654477e-05, + "loss": 0.2083, + "step": 3880, + "teacher_loss": 0.18984462320804596 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.41397538781166077, + "learning_rate": 1.6832441810033254e-05, + "loss": 0.2108, + "step": 3881, + "teacher_loss": 0.1881740689277649 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3928160071372986, + "learning_rate": 1.683677895041203e-05, + "loss": 0.232, + "step": 3882, + "teacher_loss": 0.21408069133758545 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.6389734148979187, + "learning_rate": 1.6841116090790806e-05, + "loss": 0.2732, + "step": 3883, + "teacher_loss": 0.2325562685728073 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.7795952558517456, + "learning_rate": 1.684545323116958e-05, + "loss": 0.3268, + "step": 3884, + "teacher_loss": 0.27649807929992676 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.6031370162963867, + "learning_rate": 1.6849790371548358e-05, + "loss": 0.294, + "step": 3885, + "teacher_loss": 0.259596586227417 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5631281137466431, + "learning_rate": 1.6854127511927136e-05, + "loss": 0.2587, + "step": 3886, + "teacher_loss": 0.22484168410301208 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.6453170776367188, + "learning_rate": 1.6858464652305913e-05, + "loss": 0.2936, + "step": 3887, + "teacher_loss": 0.25451862812042236 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.17686045169830322, + "learning_rate": 1.686280179268469e-05, + "loss": 0.2089, + "step": 3888, + "teacher_loss": 0.21243956685066223 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5445213913917542, + "learning_rate": 1.686713893306347e-05, + "loss": 0.3213, + "step": 3889, + "teacher_loss": 0.29651808738708496 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.6258115768432617, + "learning_rate": 1.6871476073442246e-05, + "loss": 0.2448, + "step": 3890, + "teacher_loss": 0.2024383544921875 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.18060241639614105, + "learning_rate": 1.687581321382102e-05, + "loss": 0.2, + "step": 3891, + "teacher_loss": 0.20210134983062744 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5958251953125, + "learning_rate": 1.6880150354199798e-05, + "loss": 0.24, + "step": 3892, + "teacher_loss": 0.20042569935321808 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5163853168487549, + "learning_rate": 1.6884487494578576e-05, + "loss": 0.2714, + "step": 3893, + "teacher_loss": 0.2442055642604828 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.8214160203933716, + "learning_rate": 1.6888824634957353e-05, + "loss": 0.3049, + "step": 3894, + "teacher_loss": 0.24752146005630493 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.3378959894180298, + "learning_rate": 1.6893161775336127e-05, + "loss": 0.1907, + "step": 3895, + "teacher_loss": 0.17437592148780823 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5107637047767639, + "learning_rate": 1.6897498915714905e-05, + "loss": 0.2255, + "step": 3896, + "teacher_loss": 0.19381266832351685 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.44685596227645874, + "learning_rate": 1.6901836056093683e-05, + "loss": 0.2186, + "step": 3897, + "teacher_loss": 0.1932271122932434 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.22791126370429993, + "learning_rate": 1.690617319647246e-05, + "loss": 0.301, + "step": 3898, + "teacher_loss": 0.3091421127319336 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.8121433854103088, + "learning_rate": 1.6910510336851238e-05, + "loss": 0.2013, + "step": 3899, + "teacher_loss": 0.13345089554786682 + }, + { + "compression_loss": 0.0, + "epoch": 0.7, + "label_loss": 0.5818511247634888, + "learning_rate": 1.6914847477230012e-05, + "loss": 0.2323, + "step": 3900, + "teacher_loss": 0.19344978034496307 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.26719602942466736, + "learning_rate": 1.691918461760879e-05, + "loss": 0.2449, + "step": 3901, + "teacher_loss": 0.24246114492416382 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5554804801940918, + "learning_rate": 1.6923521757987567e-05, + "loss": 0.2934, + "step": 3902, + "teacher_loss": 0.26430991291999817 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.3490039110183716, + "learning_rate": 1.6927858898366345e-05, + "loss": 0.2409, + "step": 3903, + "teacher_loss": 0.2288811206817627 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.40259408950805664, + "learning_rate": 1.6932196038745123e-05, + "loss": 0.2663, + "step": 3904, + "teacher_loss": 0.25113362073898315 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.2171630859375, + "learning_rate": 1.69365331791239e-05, + "loss": 0.3032, + "step": 3905, + "teacher_loss": 0.3127533495426178 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.6709685325622559, + "learning_rate": 1.6940870319502675e-05, + "loss": 0.224, + "step": 3906, + "teacher_loss": 0.17436185479164124 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.3068908452987671, + "learning_rate": 1.6945207459881452e-05, + "loss": 0.308, + "step": 3907, + "teacher_loss": 0.3080999255180359 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.44132161140441895, + "learning_rate": 1.6949544600260226e-05, + "loss": 0.2075, + "step": 3908, + "teacher_loss": 0.18150848150253296 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.6858330965042114, + "learning_rate": 1.6953881740639004e-05, + "loss": 0.3212, + "step": 3909, + "teacher_loss": 0.28069499135017395 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.48784172534942627, + "learning_rate": 1.695821888101778e-05, + "loss": 0.2352, + "step": 3910, + "teacher_loss": 0.20714910328388214 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.8955060243606567, + "learning_rate": 1.696255602139656e-05, + "loss": 0.2575, + "step": 3911, + "teacher_loss": 0.18657651543617249 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.4782736897468567, + "learning_rate": 1.6966893161775337e-05, + "loss": 0.287, + "step": 3912, + "teacher_loss": 0.26575610041618347 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5863543152809143, + "learning_rate": 1.6971230302154115e-05, + "loss": 0.2682, + "step": 3913, + "teacher_loss": 0.23284326493740082 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5234898924827576, + "learning_rate": 1.6975567442532892e-05, + "loss": 0.2985, + "step": 3914, + "teacher_loss": 0.2735252380371094 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.3895675539970398, + "learning_rate": 1.697990458291167e-05, + "loss": 0.2825, + "step": 3915, + "teacher_loss": 0.27055951952934265 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.4092468321323395, + "learning_rate": 1.6984241723290444e-05, + "loss": 0.3383, + "step": 3916, + "teacher_loss": 0.33041566610336304 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.6894538998603821, + "learning_rate": 1.6988578863669218e-05, + "loss": 0.3198, + "step": 3917, + "teacher_loss": 0.2786828875541687 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5031434297561646, + "learning_rate": 1.6992916004047996e-05, + "loss": 0.1988, + "step": 3918, + "teacher_loss": 0.1650250256061554 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.40064242482185364, + "learning_rate": 1.6997253144426773e-05, + "loss": 0.2786, + "step": 3919, + "teacher_loss": 0.26506125926971436 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.554537296295166, + "learning_rate": 1.700159028480555e-05, + "loss": 0.3811, + "step": 3920, + "teacher_loss": 0.3617980182170868 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5182766318321228, + "learning_rate": 1.700592742518433e-05, + "loss": 0.312, + "step": 3921, + "teacher_loss": 0.2890707552433014 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.13691446185112, + "learning_rate": 1.7010264565563106e-05, + "loss": 0.237, + "step": 3922, + "teacher_loss": 0.24807268381118774 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.09030114114284515, + "learning_rate": 1.7014601705941884e-05, + "loss": 0.1685, + "step": 3923, + "teacher_loss": 0.1771407276391983 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.33797401189804077, + "learning_rate": 1.701893884632066e-05, + "loss": 0.2142, + "step": 3924, + "teacher_loss": 0.20049187541007996 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 1.1636279821395874, + "learning_rate": 1.702327598669944e-05, + "loss": 0.3683, + "step": 3925, + "teacher_loss": 0.27993345260620117 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.4771818220615387, + "learning_rate": 1.7027613127078213e-05, + "loss": 0.2436, + "step": 3926, + "teacher_loss": 0.21764010190963745 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5608323812484741, + "learning_rate": 1.7031950267456988e-05, + "loss": 0.2476, + "step": 3927, + "teacher_loss": 0.21278738975524902 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5386644601821899, + "learning_rate": 1.7036287407835765e-05, + "loss": 0.2293, + "step": 3928, + "teacher_loss": 0.19491825997829437 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.30903929471969604, + "learning_rate": 1.7040624548214543e-05, + "loss": 0.2177, + "step": 3929, + "teacher_loss": 0.20751143991947174 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.42455366253852844, + "learning_rate": 1.704496168859332e-05, + "loss": 0.1805, + "step": 3930, + "teacher_loss": 0.15340778231620789 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.527298629283905, + "learning_rate": 1.7049298828972098e-05, + "loss": 0.2276, + "step": 3931, + "teacher_loss": 0.19428777694702148 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.2847594916820526, + "learning_rate": 1.7053635969350876e-05, + "loss": 0.2124, + "step": 3932, + "teacher_loss": 0.2043876051902771 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5373703837394714, + "learning_rate": 1.7057973109729653e-05, + "loss": 0.2624, + "step": 3933, + "teacher_loss": 0.23188257217407227 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.4049030542373657, + "learning_rate": 1.706231025010843e-05, + "loss": 0.315, + "step": 3934, + "teacher_loss": 0.3050132989883423 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.4367401897907257, + "learning_rate": 1.7066647390487205e-05, + "loss": 0.2255, + "step": 3935, + "teacher_loss": 0.20203456282615662 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.2832879424095154, + "learning_rate": 1.7070984530865983e-05, + "loss": 0.221, + "step": 3936, + "teacher_loss": 0.21404916048049927 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5835138559341431, + "learning_rate": 1.707532167124476e-05, + "loss": 0.2519, + "step": 3937, + "teacher_loss": 0.21507278084754944 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.644747257232666, + "learning_rate": 1.7079658811623535e-05, + "loss": 0.2807, + "step": 3938, + "teacher_loss": 0.24024377763271332 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.34050512313842773, + "learning_rate": 1.7083995952002312e-05, + "loss": 0.3425, + "step": 3939, + "teacher_loss": 0.34271591901779175 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.32466745376586914, + "learning_rate": 1.708833309238109e-05, + "loss": 0.1948, + "step": 3940, + "teacher_loss": 0.1804220974445343 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.45505404472351074, + "learning_rate": 1.7092670232759868e-05, + "loss": 0.3913, + "step": 3941, + "teacher_loss": 0.3842456042766571 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.49860769510269165, + "learning_rate": 1.7097007373138645e-05, + "loss": 0.2175, + "step": 3942, + "teacher_loss": 0.18628600239753723 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.3566739559173584, + "learning_rate": 1.7101344513517423e-05, + "loss": 0.1973, + "step": 3943, + "teacher_loss": 0.17964181303977966 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.5710713863372803, + "learning_rate": 1.7105681653896197e-05, + "loss": 0.2433, + "step": 3944, + "teacher_loss": 0.2068319469690323 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.6545693874359131, + "learning_rate": 1.7110018794274975e-05, + "loss": 0.3603, + "step": 3945, + "teacher_loss": 0.3275928497314453 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.1751236617565155, + "learning_rate": 1.7114355934653752e-05, + "loss": 0.2148, + "step": 3946, + "teacher_loss": 0.21917343139648438 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.3753398060798645, + "learning_rate": 1.711869307503253e-05, + "loss": 0.2433, + "step": 3947, + "teacher_loss": 0.2286595106124878 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.2913745641708374, + "learning_rate": 1.7123030215411308e-05, + "loss": 0.2641, + "step": 3948, + "teacher_loss": 0.2610868215560913 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.7679098844528198, + "learning_rate": 1.7127367355790082e-05, + "loss": 0.3244, + "step": 3949, + "teacher_loss": 0.2750801742076874 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.7221739292144775, + "learning_rate": 1.713170449616886e-05, + "loss": 0.437, + "step": 3950, + "teacher_loss": 0.4052681624889374 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.40543872117996216, + "learning_rate": 1.7136041636547637e-05, + "loss": 0.2897, + "step": 3951, + "teacher_loss": 0.2768423557281494 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.36040055751800537, + "learning_rate": 1.714037877692641e-05, + "loss": 0.2737, + "step": 3952, + "teacher_loss": 0.2640827000141144 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.32016587257385254, + "learning_rate": 1.714471591730519e-05, + "loss": 0.2422, + "step": 3953, + "teacher_loss": 0.23359136283397675 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.41428905725479126, + "learning_rate": 1.7149053057683967e-05, + "loss": 0.2175, + "step": 3954, + "teacher_loss": 0.19563668966293335 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.4965195059776306, + "learning_rate": 1.7153390198062744e-05, + "loss": 0.2791, + "step": 3955, + "teacher_loss": 0.2549506723880768 + }, + { + "compression_loss": 0.0, + "epoch": 0.71, + "label_loss": 0.11466330289840698, + "learning_rate": 1.7157727338441522e-05, + "loss": 0.1659, + "step": 3956, + "teacher_loss": 0.171542227268219 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.26543858647346497, + "learning_rate": 1.71620644788203e-05, + "loss": 0.181, + "step": 3957, + "teacher_loss": 0.17160111665725708 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.44704151153564453, + "learning_rate": 1.7166401619199077e-05, + "loss": 0.2407, + "step": 3958, + "teacher_loss": 0.21775957942008972 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.23912313580513, + "learning_rate": 1.7170738759577855e-05, + "loss": 0.1673, + "step": 3959, + "teacher_loss": 0.15928784012794495 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5461708903312683, + "learning_rate": 1.717507589995663e-05, + "loss": 0.2266, + "step": 3960, + "teacher_loss": 0.19106240570545197 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.6859482526779175, + "learning_rate": 1.7179413040335403e-05, + "loss": 0.3065, + "step": 3961, + "teacher_loss": 0.2643158435821533 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.3557426929473877, + "learning_rate": 1.718375018071418e-05, + "loss": 0.1966, + "step": 3962, + "teacher_loss": 0.1788983941078186 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.2776501178741455, + "learning_rate": 1.718808732109296e-05, + "loss": 0.2024, + "step": 3963, + "teacher_loss": 0.19399815797805786 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.19211947917938232, + "learning_rate": 1.7192424461471736e-05, + "loss": 0.153, + "step": 3964, + "teacher_loss": 0.14860612154006958 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.7693924307823181, + "learning_rate": 1.7196761601850514e-05, + "loss": 0.2738, + "step": 3965, + "teacher_loss": 0.21872790157794952 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5709037780761719, + "learning_rate": 1.720109874222929e-05, + "loss": 0.2598, + "step": 3966, + "teacher_loss": 0.22521045804023743 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.3617135286331177, + "learning_rate": 1.720543588260807e-05, + "loss": 0.1982, + "step": 3967, + "teacher_loss": 0.18006360530853271 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5754263401031494, + "learning_rate": 1.7209773022986846e-05, + "loss": 0.3296, + "step": 3968, + "teacher_loss": 0.30225008726119995 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.3671974837779999, + "learning_rate": 1.7214110163365624e-05, + "loss": 0.2475, + "step": 3969, + "teacher_loss": 0.23424479365348816 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.7586290836334229, + "learning_rate": 1.72184473037444e-05, + "loss": 0.2548, + "step": 3970, + "teacher_loss": 0.1987859606742859 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5211168527603149, + "learning_rate": 1.7222784444123173e-05, + "loss": 0.2696, + "step": 3971, + "teacher_loss": 0.241624116897583 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.19971995055675507, + "learning_rate": 1.722712158450195e-05, + "loss": 0.1787, + "step": 3972, + "teacher_loss": 0.1763882339000702 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.7566083669662476, + "learning_rate": 1.7231458724880728e-05, + "loss": 0.2395, + "step": 3973, + "teacher_loss": 0.18208879232406616 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.13347968459129333, + "learning_rate": 1.7235795865259505e-05, + "loss": 0.1625, + "step": 3974, + "teacher_loss": 0.16568827629089355 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.38059964776039124, + "learning_rate": 1.7240133005638283e-05, + "loss": 0.2691, + "step": 3975, + "teacher_loss": 0.2567032277584076 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5140262246131897, + "learning_rate": 1.724447014601706e-05, + "loss": 0.2827, + "step": 3976, + "teacher_loss": 0.256994366645813 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.8959448337554932, + "learning_rate": 1.7248807286395838e-05, + "loss": 0.288, + "step": 3977, + "teacher_loss": 0.22042669355869293 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.20358942449092865, + "learning_rate": 1.7253144426774616e-05, + "loss": 0.1868, + "step": 3978, + "teacher_loss": 0.18489934504032135 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.4068831205368042, + "learning_rate": 1.725748156715339e-05, + "loss": 0.3297, + "step": 3979, + "teacher_loss": 0.32108765840530396 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.2756415605545044, + "learning_rate": 1.7261818707532168e-05, + "loss": 0.2931, + "step": 3980, + "teacher_loss": 0.2950747013092041 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.3574574291706085, + "learning_rate": 1.7266155847910945e-05, + "loss": 0.2402, + "step": 3981, + "teacher_loss": 0.22721371054649353 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5514946579933167, + "learning_rate": 1.727049298828972e-05, + "loss": 0.2963, + "step": 3982, + "teacher_loss": 0.2679723799228668 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.8199466466903687, + "learning_rate": 1.7274830128668497e-05, + "loss": 0.2943, + "step": 3983, + "teacher_loss": 0.23588624596595764 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.3846074938774109, + "learning_rate": 1.7279167269047275e-05, + "loss": 0.3388, + "step": 3984, + "teacher_loss": 0.33373701572418213 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.729701578617096, + "learning_rate": 1.7283504409426052e-05, + "loss": 0.3053, + "step": 3985, + "teacher_loss": 0.2581731081008911 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5193918943405151, + "learning_rate": 1.728784154980483e-05, + "loss": 0.2593, + "step": 3986, + "teacher_loss": 0.2303738296031952 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 1.0310242176055908, + "learning_rate": 1.7292178690183608e-05, + "loss": 0.3085, + "step": 3987, + "teacher_loss": 0.22822466492652893 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.33354344964027405, + "learning_rate": 1.7296515830562382e-05, + "loss": 0.2206, + "step": 3988, + "teacher_loss": 0.20810367166996002 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.38283175230026245, + "learning_rate": 1.730085297094116e-05, + "loss": 0.2679, + "step": 3989, + "teacher_loss": 0.25515997409820557 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5748469233512878, + "learning_rate": 1.7305190111319937e-05, + "loss": 0.2861, + "step": 3990, + "teacher_loss": 0.25400805473327637 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.45908045768737793, + "learning_rate": 1.7309527251698715e-05, + "loss": 0.2959, + "step": 3991, + "teacher_loss": 0.27780085802078247 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.4164584279060364, + "learning_rate": 1.7313864392077492e-05, + "loss": 0.2771, + "step": 3992, + "teacher_loss": 0.2615690529346466 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5435278415679932, + "learning_rate": 1.7318201532456267e-05, + "loss": 0.3015, + "step": 3993, + "teacher_loss": 0.27455759048461914 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.4714326858520508, + "learning_rate": 1.7322538672835044e-05, + "loss": 0.2458, + "step": 3994, + "teacher_loss": 0.22067826986312866 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.46516501903533936, + "learning_rate": 1.7326875813213822e-05, + "loss": 0.3276, + "step": 3995, + "teacher_loss": 0.3123038411140442 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.44686880707740784, + "learning_rate": 1.7331212953592596e-05, + "loss": 0.2469, + "step": 3996, + "teacher_loss": 0.22468581795692444 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.46535807847976685, + "learning_rate": 1.7335550093971374e-05, + "loss": 0.1952, + "step": 3997, + "teacher_loss": 0.16521766781806946 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.34706300497055054, + "learning_rate": 1.733988723435015e-05, + "loss": 0.2445, + "step": 3998, + "teacher_loss": 0.2331579178571701 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.3269864618778229, + "learning_rate": 1.734422437472893e-05, + "loss": 0.2136, + "step": 3999, + "teacher_loss": 0.2010076344013214 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.7116924524307251, + "learning_rate": 1.7348561515107707e-05, + "loss": 0.3018, + "step": 4000, + "teacher_loss": 0.2562645375728607 + }, + { + "epoch": 0.72, + "eval_exact_match": 79.75402081362347, + "eval_f1": 87.30717881985169, + "step": 4000 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.33770906925201416, + "learning_rate": 1.7352898655486484e-05, + "loss": 0.224, + "step": 4001, + "teacher_loss": 0.21131166815757751 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.5675424933433533, + "learning_rate": 1.7357235795865262e-05, + "loss": 0.1815, + "step": 4002, + "teacher_loss": 0.1386110782623291 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.7261936664581299, + "learning_rate": 1.736157293624404e-05, + "loss": 0.4138, + "step": 4003, + "teacher_loss": 0.379089891910553 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.39315661787986755, + "learning_rate": 1.7365910076622814e-05, + "loss": 0.3442, + "step": 4004, + "teacher_loss": 0.33879828453063965 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.4980563223361969, + "learning_rate": 1.7370247217001588e-05, + "loss": 0.2667, + "step": 4005, + "teacher_loss": 0.24095144867897034 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.22820442914962769, + "learning_rate": 1.7374584357380366e-05, + "loss": 0.1743, + "step": 4006, + "teacher_loss": 0.16832825541496277 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.7101324796676636, + "learning_rate": 1.7378921497759143e-05, + "loss": 0.2602, + "step": 4007, + "teacher_loss": 0.21025151014328003 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.22062525153160095, + "learning_rate": 1.738325863813792e-05, + "loss": 0.1651, + "step": 4008, + "teacher_loss": 0.15893635153770447 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.34375327825546265, + "learning_rate": 1.73875957785167e-05, + "loss": 0.2606, + "step": 4009, + "teacher_loss": 0.25138455629348755 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.260866641998291, + "learning_rate": 1.7391932918895476e-05, + "loss": 0.2097, + "step": 4010, + "teacher_loss": 0.20406612753868103 + }, + { + "compression_loss": 0.0, + "epoch": 0.72, + "label_loss": 0.24984990060329437, + "learning_rate": 1.7396270059274254e-05, + "loss": 0.2282, + "step": 4011, + "teacher_loss": 0.2258007973432541 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.9095983505249023, + "learning_rate": 1.740060719965303e-05, + "loss": 0.3237, + "step": 4012, + "teacher_loss": 0.25863415002822876 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.4446542263031006, + "learning_rate": 1.740494434003181e-05, + "loss": 0.2234, + "step": 4013, + "teacher_loss": 0.19883191585540771 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.28768840432167053, + "learning_rate": 1.7409281480410583e-05, + "loss": 0.1391, + "step": 4014, + "teacher_loss": 0.12255939096212387 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.27036651968955994, + "learning_rate": 1.7413618620789357e-05, + "loss": 0.2673, + "step": 4015, + "teacher_loss": 0.2669178545475006 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.35449501872062683, + "learning_rate": 1.7417955761168135e-05, + "loss": 0.2001, + "step": 4016, + "teacher_loss": 0.1829456090927124 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.7128984928131104, + "learning_rate": 1.7422292901546913e-05, + "loss": 0.288, + "step": 4017, + "teacher_loss": 0.24080851674079895 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.520004153251648, + "learning_rate": 1.742663004192569e-05, + "loss": 0.2954, + "step": 4018, + "teacher_loss": 0.2704889178276062 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.5462685227394104, + "learning_rate": 1.7430967182304468e-05, + "loss": 0.275, + "step": 4019, + "teacher_loss": 0.24483177065849304 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.7000532746315002, + "learning_rate": 1.7435304322683246e-05, + "loss": 0.3071, + "step": 4020, + "teacher_loss": 0.2634274959564209 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.8150086998939514, + "learning_rate": 1.7439641463062023e-05, + "loss": 0.3161, + "step": 4021, + "teacher_loss": 0.2606182098388672 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.7050122022628784, + "learning_rate": 1.74439786034408e-05, + "loss": 0.3248, + "step": 4022, + "teacher_loss": 0.28258079290390015 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3940303921699524, + "learning_rate": 1.7448315743819575e-05, + "loss": 0.2612, + "step": 4023, + "teacher_loss": 0.24647334218025208 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.4136486053466797, + "learning_rate": 1.7452652884198353e-05, + "loss": 0.3057, + "step": 4024, + "teacher_loss": 0.2937406897544861 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.40887176990509033, + "learning_rate": 1.7456990024577127e-05, + "loss": 0.2973, + "step": 4025, + "teacher_loss": 0.28495338559150696 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.265727698802948, + "learning_rate": 1.7461327164955904e-05, + "loss": 0.2324, + "step": 4026, + "teacher_loss": 0.22871285676956177 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3892470896244049, + "learning_rate": 1.7465664305334682e-05, + "loss": 0.232, + "step": 4027, + "teacher_loss": 0.2145436406135559 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.35933834314346313, + "learning_rate": 1.747000144571346e-05, + "loss": 0.3416, + "step": 4028, + "teacher_loss": 0.33963119983673096 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.6742483377456665, + "learning_rate": 1.7474338586092237e-05, + "loss": 0.2296, + "step": 4029, + "teacher_loss": 0.18020948767662048 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.2300499528646469, + "learning_rate": 1.7478675726471015e-05, + "loss": 0.3051, + "step": 4030, + "teacher_loss": 0.3134298324584961 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.16709429025650024, + "learning_rate": 1.7483012866849793e-05, + "loss": 0.2203, + "step": 4031, + "teacher_loss": 0.22620849311351776 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3943938612937927, + "learning_rate": 1.7487350007228567e-05, + "loss": 0.1997, + "step": 4032, + "teacher_loss": 0.1780540645122528 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.5219937562942505, + "learning_rate": 1.7491687147607344e-05, + "loss": 0.2249, + "step": 4033, + "teacher_loss": 0.19186696410179138 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 1.0082002878189087, + "learning_rate": 1.7496024287986122e-05, + "loss": 0.356, + "step": 4034, + "teacher_loss": 0.2835494875907898 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.372824490070343, + "learning_rate": 1.75003614283649e-05, + "loss": 0.2724, + "step": 4035, + "teacher_loss": 0.26122957468032837 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.803097665309906, + "learning_rate": 1.7504698568743674e-05, + "loss": 0.4271, + "step": 4036, + "teacher_loss": 0.3852939307689667 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.30804795026779175, + "learning_rate": 1.750903570912245e-05, + "loss": 0.1848, + "step": 4037, + "teacher_loss": 0.17106834053993225 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.22174109518527985, + "learning_rate": 1.751337284950123e-05, + "loss": 0.2254, + "step": 4038, + "teacher_loss": 0.22582527995109558 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.2759023904800415, + "learning_rate": 1.7517709989880007e-05, + "loss": 0.2374, + "step": 4039, + "teacher_loss": 0.23315656185150146 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.23789766430854797, + "learning_rate": 1.7522047130258784e-05, + "loss": 0.2512, + "step": 4040, + "teacher_loss": 0.2526342272758484 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.5043667554855347, + "learning_rate": 1.752638427063756e-05, + "loss": 0.1902, + "step": 4041, + "teacher_loss": 0.15527993440628052 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.4522740840911865, + "learning_rate": 1.7530721411016336e-05, + "loss": 0.2334, + "step": 4042, + "teacher_loss": 0.20903506875038147 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.2933364808559418, + "learning_rate": 1.7535058551395114e-05, + "loss": 0.2601, + "step": 4043, + "teacher_loss": 0.25644904375076294 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3178500235080719, + "learning_rate": 1.753939569177389e-05, + "loss": 0.2282, + "step": 4044, + "teacher_loss": 0.21819926798343658 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3540683090686798, + "learning_rate": 1.754373283215267e-05, + "loss": 0.2301, + "step": 4045, + "teacher_loss": 0.21635138988494873 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.6484888792037964, + "learning_rate": 1.7548069972531447e-05, + "loss": 0.3186, + "step": 4046, + "teacher_loss": 0.28195860981941223 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3333244323730469, + "learning_rate": 1.755240711291022e-05, + "loss": 0.1922, + "step": 4047, + "teacher_loss": 0.17654263973236084 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.6256155371665955, + "learning_rate": 1.7556744253289e-05, + "loss": 0.2163, + "step": 4048, + "teacher_loss": 0.1708238422870636 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.5275435447692871, + "learning_rate": 1.7561081393667773e-05, + "loss": 0.3682, + "step": 4049, + "teacher_loss": 0.3505405783653259 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.334852397441864, + "learning_rate": 1.756541853404655e-05, + "loss": 0.2029, + "step": 4050, + "teacher_loss": 0.1882268488407135 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.834480881690979, + "learning_rate": 1.7569755674425328e-05, + "loss": 0.3045, + "step": 4051, + "teacher_loss": 0.24565812945365906 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.9154723286628723, + "learning_rate": 1.7574092814804106e-05, + "loss": 0.3502, + "step": 4052, + "teacher_loss": 0.28736215829849243 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.2546464204788208, + "learning_rate": 1.7578429955182883e-05, + "loss": 0.2324, + "step": 4053, + "teacher_loss": 0.22994284331798553 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.2698632478713989, + "learning_rate": 1.758276709556166e-05, + "loss": 0.2088, + "step": 4054, + "teacher_loss": 0.20201200246810913 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.43870335817337036, + "learning_rate": 1.758710423594044e-05, + "loss": 0.3067, + "step": 4055, + "teacher_loss": 0.2920305132865906 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.5013998746871948, + "learning_rate": 1.7591441376319216e-05, + "loss": 0.3039, + "step": 4056, + "teacher_loss": 0.28196966648101807 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.36489903926849365, + "learning_rate": 1.7595778516697994e-05, + "loss": 0.2677, + "step": 4057, + "teacher_loss": 0.25687360763549805 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.3993161916732788, + "learning_rate": 1.7600115657076765e-05, + "loss": 0.3249, + "step": 4058, + "teacher_loss": 0.31662583351135254 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.356062114238739, + "learning_rate": 1.7604452797455542e-05, + "loss": 0.2502, + "step": 4059, + "teacher_loss": 0.23846256732940674 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.9452967643737793, + "learning_rate": 1.760878993783432e-05, + "loss": 0.2715, + "step": 4060, + "teacher_loss": 0.19665464758872986 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.46517735719680786, + "learning_rate": 1.7613127078213098e-05, + "loss": 0.2347, + "step": 4061, + "teacher_loss": 0.20907726883888245 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 1.092389702796936, + "learning_rate": 1.7617464218591875e-05, + "loss": 0.3699, + "step": 4062, + "teacher_loss": 0.28963568806648254 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.409944087266922, + "learning_rate": 1.7621801358970653e-05, + "loss": 0.2182, + "step": 4063, + "teacher_loss": 0.19692330062389374 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.46763208508491516, + "learning_rate": 1.762613849934943e-05, + "loss": 0.2412, + "step": 4064, + "teacher_loss": 0.2160019725561142 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.22305750846862793, + "learning_rate": 1.7630475639728208e-05, + "loss": 0.2236, + "step": 4065, + "teacher_loss": 0.22365880012512207 + }, + { + "compression_loss": 0.0, + "epoch": 0.73, + "label_loss": 0.4423673152923584, + "learning_rate": 1.7634812780106986e-05, + "loss": 0.2448, + "step": 4066, + "teacher_loss": 0.22282114624977112 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.37302684783935547, + "learning_rate": 1.763914992048576e-05, + "loss": 0.2066, + "step": 4067, + "teacher_loss": 0.18813541531562805 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.5116618871688843, + "learning_rate": 1.7643487060864538e-05, + "loss": 0.3366, + "step": 4068, + "teacher_loss": 0.31709977984428406 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.36667102575302124, + "learning_rate": 1.7647824201243312e-05, + "loss": 0.2133, + "step": 4069, + "teacher_loss": 0.19627505540847778 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.3286817669868469, + "learning_rate": 1.765216134162209e-05, + "loss": 0.2192, + "step": 4070, + "teacher_loss": 0.20701350271701813 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6520019769668579, + "learning_rate": 1.7656498482000867e-05, + "loss": 0.2232, + "step": 4071, + "teacher_loss": 0.17553207278251648 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6495643854141235, + "learning_rate": 1.7660835622379645e-05, + "loss": 0.2921, + "step": 4072, + "teacher_loss": 0.25240421295166016 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.4447469413280487, + "learning_rate": 1.7665172762758422e-05, + "loss": 0.1859, + "step": 4073, + "teacher_loss": 0.15714462101459503 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.4054219126701355, + "learning_rate": 1.76695099031372e-05, + "loss": 0.217, + "step": 4074, + "teacher_loss": 0.19610343873500824 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.2489941418170929, + "learning_rate": 1.7673847043515977e-05, + "loss": 0.196, + "step": 4075, + "teacher_loss": 0.19012480974197388 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.7123292684555054, + "learning_rate": 1.7678184183894752e-05, + "loss": 0.3415, + "step": 4076, + "teacher_loss": 0.3002733290195465 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.517052173614502, + "learning_rate": 1.768252132427353e-05, + "loss": 0.3496, + "step": 4077, + "teacher_loss": 0.330968976020813 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6957550048828125, + "learning_rate": 1.7686858464652307e-05, + "loss": 0.5273, + "step": 4078, + "teacher_loss": 0.5085822343826294 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.707707405090332, + "learning_rate": 1.7691195605031085e-05, + "loss": 0.3184, + "step": 4079, + "teacher_loss": 0.2751516103744507 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.22795212268829346, + "learning_rate": 1.769553274540986e-05, + "loss": 0.162, + "step": 4080, + "teacher_loss": 0.15472549200057983 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.40044939517974854, + "learning_rate": 1.7699869885788636e-05, + "loss": 0.2142, + "step": 4081, + "teacher_loss": 0.19347555935382843 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.31254029273986816, + "learning_rate": 1.7704207026167414e-05, + "loss": 0.1956, + "step": 4082, + "teacher_loss": 0.18261130154132843 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.9501831531524658, + "learning_rate": 1.770854416654619e-05, + "loss": 0.2485, + "step": 4083, + "teacher_loss": 0.17052507400512695 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.5576575994491577, + "learning_rate": 1.771288130692497e-05, + "loss": 0.386, + "step": 4084, + "teacher_loss": 0.36688530445098877 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.24088335037231445, + "learning_rate": 1.7717218447303744e-05, + "loss": 0.1493, + "step": 4085, + "teacher_loss": 0.1391381025314331 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.42695024609565735, + "learning_rate": 1.772155558768252e-05, + "loss": 0.1858, + "step": 4086, + "teacher_loss": 0.15901176631450653 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.40484851598739624, + "learning_rate": 1.77258927280613e-05, + "loss": 0.1983, + "step": 4087, + "teacher_loss": 0.17537343502044678 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.4246392846107483, + "learning_rate": 1.7730229868440076e-05, + "loss": 0.2343, + "step": 4088, + "teacher_loss": 0.21316301822662354 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.2727547883987427, + "learning_rate": 1.7734567008818854e-05, + "loss": 0.2921, + "step": 4089, + "teacher_loss": 0.2942776083946228 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6109724640846252, + "learning_rate": 1.773890414919763e-05, + "loss": 0.237, + "step": 4090, + "teacher_loss": 0.1954127997159958 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.33399826288223267, + "learning_rate": 1.7743241289576406e-05, + "loss": 0.2703, + "step": 4091, + "teacher_loss": 0.2632274925708771 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.5302850008010864, + "learning_rate": 1.7747578429955184e-05, + "loss": 0.2868, + "step": 4092, + "teacher_loss": 0.25974148511886597 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6457614302635193, + "learning_rate": 1.7751915570333958e-05, + "loss": 0.2318, + "step": 4093, + "teacher_loss": 0.18582448363304138 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.5607349872589111, + "learning_rate": 1.7756252710712735e-05, + "loss": 0.2212, + "step": 4094, + "teacher_loss": 0.18351811170578003 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6887947916984558, + "learning_rate": 1.7760589851091513e-05, + "loss": 0.4157, + "step": 4095, + "teacher_loss": 0.3854042589664459 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.189820796251297, + "learning_rate": 1.776492699147029e-05, + "loss": 0.2697, + "step": 4096, + "teacher_loss": 0.2785395383834839 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.2410605251789093, + "learning_rate": 1.7769264131849068e-05, + "loss": 0.2587, + "step": 4097, + "teacher_loss": 0.2607034146785736 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.19291429221630096, + "learning_rate": 1.7773601272227846e-05, + "loss": 0.1876, + "step": 4098, + "teacher_loss": 0.18701593577861786 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.3723694682121277, + "learning_rate": 1.7777938412606623e-05, + "loss": 0.2649, + "step": 4099, + "teacher_loss": 0.2529558837413788 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6752137541770935, + "learning_rate": 1.77822755529854e-05, + "loss": 0.3135, + "step": 4100, + "teacher_loss": 0.2733563184738159 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.17171594500541687, + "learning_rate": 1.778661269336418e-05, + "loss": 0.1699, + "step": 4101, + "teacher_loss": 0.1696939766407013 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6029078960418701, + "learning_rate": 1.779094983374295e-05, + "loss": 0.2709, + "step": 4102, + "teacher_loss": 0.23398740589618683 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.503454327583313, + "learning_rate": 1.7795286974121727e-05, + "loss": 0.3285, + "step": 4103, + "teacher_loss": 0.3090372085571289 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6840324401855469, + "learning_rate": 1.7799624114500505e-05, + "loss": 0.2289, + "step": 4104, + "teacher_loss": 0.1783227175474167 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.7135021686553955, + "learning_rate": 1.7803961254879282e-05, + "loss": 0.2781, + "step": 4105, + "teacher_loss": 0.22974857687950134 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.7254519462585449, + "learning_rate": 1.780829839525806e-05, + "loss": 0.2955, + "step": 4106, + "teacher_loss": 0.24771904945373535 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.7794185280799866, + "learning_rate": 1.7812635535636838e-05, + "loss": 0.2183, + "step": 4107, + "teacher_loss": 0.15597784519195557 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.4867413640022278, + "learning_rate": 1.7816972676015615e-05, + "loss": 0.2977, + "step": 4108, + "teacher_loss": 0.2767234146595001 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.3026319444179535, + "learning_rate": 1.7821309816394393e-05, + "loss": 0.1936, + "step": 4109, + "teacher_loss": 0.18148526549339294 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6156463623046875, + "learning_rate": 1.782564695677317e-05, + "loss": 0.3303, + "step": 4110, + "teacher_loss": 0.2986099421977997 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.4075435996055603, + "learning_rate": 1.7829984097151945e-05, + "loss": 0.2092, + "step": 4111, + "teacher_loss": 0.1871265470981598 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.1646701693534851, + "learning_rate": 1.7834321237530722e-05, + "loss": 0.2901, + "step": 4112, + "teacher_loss": 0.3040759563446045 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.9276775121688843, + "learning_rate": 1.7838658377909497e-05, + "loss": 0.332, + "step": 4113, + "teacher_loss": 0.2658011317253113 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.23450957238674164, + "learning_rate": 1.7842995518288274e-05, + "loss": 0.2282, + "step": 4114, + "teacher_loss": 0.22748714685440063 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.3063355088233948, + "learning_rate": 1.7847332658667052e-05, + "loss": 0.2024, + "step": 4115, + "teacher_loss": 0.19082927703857422 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6012687087059021, + "learning_rate": 1.785166979904583e-05, + "loss": 0.3315, + "step": 4116, + "teacher_loss": 0.3015064597129822 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.29128801822662354, + "learning_rate": 1.7856006939424607e-05, + "loss": 0.2198, + "step": 4117, + "teacher_loss": 0.21189172565937042 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.3383934199810028, + "learning_rate": 1.7860344079803385e-05, + "loss": 0.2403, + "step": 4118, + "teacher_loss": 0.2294088900089264 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.27994000911712646, + "learning_rate": 1.7864681220182162e-05, + "loss": 0.2066, + "step": 4119, + "teacher_loss": 0.19841763377189636 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.21849367022514343, + "learning_rate": 1.7869018360560937e-05, + "loss": 0.1768, + "step": 4120, + "teacher_loss": 0.17214488983154297 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.5366904735565186, + "learning_rate": 1.7873355500939714e-05, + "loss": 0.2914, + "step": 4121, + "teacher_loss": 0.26419180631637573 + }, + { + "compression_loss": 0.0, + "epoch": 0.74, + "label_loss": 0.6963645219802856, + "learning_rate": 1.7877692641318492e-05, + "loss": 0.2736, + "step": 4122, + "teacher_loss": 0.22660967707633972 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.5423459410667419, + "learning_rate": 1.7882029781697266e-05, + "loss": 0.3279, + "step": 4123, + "teacher_loss": 0.30402815341949463 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.5324422717094421, + "learning_rate": 1.7886366922076044e-05, + "loss": 0.3405, + "step": 4124, + "teacher_loss": 0.3191503882408142 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.646681547164917, + "learning_rate": 1.789070406245482e-05, + "loss": 0.2399, + "step": 4125, + "teacher_loss": 0.19468306005001068 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.29867154359817505, + "learning_rate": 1.78950412028336e-05, + "loss": 0.1801, + "step": 4126, + "teacher_loss": 0.16696974635124207 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.9584167003631592, + "learning_rate": 1.7899378343212377e-05, + "loss": 0.352, + "step": 4127, + "teacher_loss": 0.2845892906188965 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.18704788386821747, + "learning_rate": 1.7903715483591154e-05, + "loss": 0.174, + "step": 4128, + "teacher_loss": 0.1725316345691681 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.311404287815094, + "learning_rate": 1.790805262396993e-05, + "loss": 0.2225, + "step": 4129, + "teacher_loss": 0.21261435747146606 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.42546606063842773, + "learning_rate": 1.7912389764348706e-05, + "loss": 0.1985, + "step": 4130, + "teacher_loss": 0.17323258519172668 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.32190102338790894, + "learning_rate": 1.7916726904727484e-05, + "loss": 0.2371, + "step": 4131, + "teacher_loss": 0.22767257690429688 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.4125993251800537, + "learning_rate": 1.792106404510626e-05, + "loss": 0.2652, + "step": 4132, + "teacher_loss": 0.24884022772312164 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.528472900390625, + "learning_rate": 1.792540118548504e-05, + "loss": 0.2224, + "step": 4133, + "teacher_loss": 0.18836882710456848 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.2622644901275635, + "learning_rate": 1.7929738325863813e-05, + "loss": 0.27, + "step": 4134, + "teacher_loss": 0.2708420753479004 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.46562889218330383, + "learning_rate": 1.793407546624259e-05, + "loss": 0.2739, + "step": 4135, + "teacher_loss": 0.25260233879089355 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3772982060909271, + "learning_rate": 1.793841260662137e-05, + "loss": 0.2383, + "step": 4136, + "teacher_loss": 0.2228030562400818 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.6222391128540039, + "learning_rate": 1.7942749747000143e-05, + "loss": 0.374, + "step": 4137, + "teacher_loss": 0.34645402431488037 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.1699390858411789, + "learning_rate": 1.794708688737892e-05, + "loss": 0.1548, + "step": 4138, + "teacher_loss": 0.15308451652526855 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.5850667953491211, + "learning_rate": 1.7951424027757698e-05, + "loss": 0.3584, + "step": 4139, + "teacher_loss": 0.3332614302635193 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 1.1333680152893066, + "learning_rate": 1.7955761168136475e-05, + "loss": 0.3943, + "step": 4140, + "teacher_loss": 0.3121543526649475 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.32772552967071533, + "learning_rate": 1.7960098308515253e-05, + "loss": 0.2606, + "step": 4141, + "teacher_loss": 0.2531731128692627 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.2984614074230194, + "learning_rate": 1.796443544889403e-05, + "loss": 0.2214, + "step": 4142, + "teacher_loss": 0.21281316876411438 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.5480458736419678, + "learning_rate": 1.796877258927281e-05, + "loss": 0.2742, + "step": 4143, + "teacher_loss": 0.24381019175052643 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.7649442553520203, + "learning_rate": 1.7973109729651586e-05, + "loss": 0.2611, + "step": 4144, + "teacher_loss": 0.20515291392803192 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.616247296333313, + "learning_rate": 1.797744687003036e-05, + "loss": 0.2843, + "step": 4145, + "teacher_loss": 0.24743354320526123 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3074440360069275, + "learning_rate": 1.7981784010409134e-05, + "loss": 0.2713, + "step": 4146, + "teacher_loss": 0.2672742009162903 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.4931604266166687, + "learning_rate": 1.7986121150787912e-05, + "loss": 0.3286, + "step": 4147, + "teacher_loss": 0.3102909326553345 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.7713298201560974, + "learning_rate": 1.799045829116669e-05, + "loss": 0.3562, + "step": 4148, + "teacher_loss": 0.31005924940109253 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.6208812594413757, + "learning_rate": 1.7994795431545467e-05, + "loss": 0.3075, + "step": 4149, + "teacher_loss": 0.2726662755012512 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.5932831764221191, + "learning_rate": 1.7999132571924245e-05, + "loss": 0.3224, + "step": 4150, + "teacher_loss": 0.2922472059726715 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.41951775550842285, + "learning_rate": 1.8003469712303023e-05, + "loss": 0.2359, + "step": 4151, + "teacher_loss": 0.21553251147270203 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3235367238521576, + "learning_rate": 1.80078068526818e-05, + "loss": 0.1936, + "step": 4152, + "teacher_loss": 0.17914780974388123 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.6134922504425049, + "learning_rate": 1.8012143993060578e-05, + "loss": 0.3397, + "step": 4153, + "teacher_loss": 0.30927425622940063 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.9352678060531616, + "learning_rate": 1.8016481133439355e-05, + "loss": 0.3284, + "step": 4154, + "teacher_loss": 0.2609473466873169 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.6744242906570435, + "learning_rate": 1.802081827381813e-05, + "loss": 0.6153, + "step": 4155, + "teacher_loss": 0.608716607093811 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.2243228554725647, + "learning_rate": 1.8025155414196904e-05, + "loss": 0.1874, + "step": 4156, + "teacher_loss": 0.18333221971988678 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.8162073493003845, + "learning_rate": 1.802949255457568e-05, + "loss": 0.2931, + "step": 4157, + "teacher_loss": 0.2349727600812912 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.7589199542999268, + "learning_rate": 1.803382969495446e-05, + "loss": 0.6153, + "step": 4158, + "teacher_loss": 0.5993785858154297 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.7566730380058289, + "learning_rate": 1.8038166835333237e-05, + "loss": 0.3598, + "step": 4159, + "teacher_loss": 0.3157259225845337 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.4488690197467804, + "learning_rate": 1.8042503975712014e-05, + "loss": 0.2876, + "step": 4160, + "teacher_loss": 0.2697228789329529 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3223251402378082, + "learning_rate": 1.8046841116090792e-05, + "loss": 0.184, + "step": 4161, + "teacher_loss": 0.16859588027000427 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.6922459602355957, + "learning_rate": 1.805117825646957e-05, + "loss": 0.2506, + "step": 4162, + "teacher_loss": 0.2015790194272995 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3889782726764679, + "learning_rate": 1.8055515396848347e-05, + "loss": 0.2218, + "step": 4163, + "teacher_loss": 0.20322087407112122 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.4009304642677307, + "learning_rate": 1.805985253722712e-05, + "loss": 0.2259, + "step": 4164, + "teacher_loss": 0.20640188455581665 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.5789691805839539, + "learning_rate": 1.80641896776059e-05, + "loss": 0.2399, + "step": 4165, + "teacher_loss": 0.20227554440498352 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3846268653869629, + "learning_rate": 1.8068526817984677e-05, + "loss": 0.2227, + "step": 4166, + "teacher_loss": 0.20473584532737732 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.750187873840332, + "learning_rate": 1.807286395836345e-05, + "loss": 0.3149, + "step": 4167, + "teacher_loss": 0.26654112339019775 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.3824586272239685, + "learning_rate": 1.807720109874223e-05, + "loss": 0.2905, + "step": 4168, + "teacher_loss": 0.28028547763824463 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.432822048664093, + "learning_rate": 1.8081538239121006e-05, + "loss": 0.1926, + "step": 4169, + "teacher_loss": 0.1659039705991745 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.6052897572517395, + "learning_rate": 1.8085875379499784e-05, + "loss": 0.4069, + "step": 4170, + "teacher_loss": 0.38488519191741943 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.41225871443748474, + "learning_rate": 1.809021251987856e-05, + "loss": 0.224, + "step": 4171, + "teacher_loss": 0.2030371129512787 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.4907906651496887, + "learning_rate": 1.809454966025734e-05, + "loss": 0.3597, + "step": 4172, + "teacher_loss": 0.3451845645904541 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.45901599526405334, + "learning_rate": 1.8098886800636113e-05, + "loss": 0.1972, + "step": 4173, + "teacher_loss": 0.16811567544937134 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.36044102907180786, + "learning_rate": 1.810322394101489e-05, + "loss": 0.2454, + "step": 4174, + "teacher_loss": 0.23265400528907776 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.7801012992858887, + "learning_rate": 1.810756108139367e-05, + "loss": 0.3481, + "step": 4175, + "teacher_loss": 0.3000839054584503 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.7795083522796631, + "learning_rate": 1.8111898221772446e-05, + "loss": 0.2888, + "step": 4176, + "teacher_loss": 0.23425626754760742 + }, + { + "compression_loss": 0.0, + "epoch": 0.75, + "label_loss": 0.4698697626590729, + "learning_rate": 1.8116235362151224e-05, + "loss": 0.2827, + "step": 4177, + "teacher_loss": 0.2618792653083801 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 1.0276384353637695, + "learning_rate": 1.8120572502529998e-05, + "loss": 0.4081, + "step": 4178, + "teacher_loss": 0.33921635150909424 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.44592103362083435, + "learning_rate": 1.8124909642908776e-05, + "loss": 0.1803, + "step": 4179, + "teacher_loss": 0.1507425308227539 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.48160549998283386, + "learning_rate": 1.8129246783287553e-05, + "loss": 0.2409, + "step": 4180, + "teacher_loss": 0.21416831016540527 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.6662040948867798, + "learning_rate": 1.813358392366633e-05, + "loss": 0.3531, + "step": 4181, + "teacher_loss": 0.3183550238609314 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.35104310512542725, + "learning_rate": 1.8137921064045105e-05, + "loss": 0.2688, + "step": 4182, + "teacher_loss": 0.2596573233604431 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.48213931918144226, + "learning_rate": 1.8142258204423883e-05, + "loss": 0.2757, + "step": 4183, + "teacher_loss": 0.25270798802375793 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.845737636089325, + "learning_rate": 1.814659534480266e-05, + "loss": 0.2799, + "step": 4184, + "teacher_loss": 0.21700571477413177 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.34580889344215393, + "learning_rate": 1.8150932485181438e-05, + "loss": 0.1925, + "step": 4185, + "teacher_loss": 0.17543494701385498 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.4003676176071167, + "learning_rate": 1.8155269625560216e-05, + "loss": 0.2833, + "step": 4186, + "teacher_loss": 0.2702893614768982 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.7470386028289795, + "learning_rate": 1.8159606765938993e-05, + "loss": 0.3804, + "step": 4187, + "teacher_loss": 0.33962613344192505 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.3823215365409851, + "learning_rate": 1.816394390631777e-05, + "loss": 0.2035, + "step": 4188, + "teacher_loss": 0.18361711502075195 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.4612460434436798, + "learning_rate": 1.8168281046696545e-05, + "loss": 0.2919, + "step": 4189, + "teacher_loss": 0.2730613350868225 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.46669965982437134, + "learning_rate": 1.817261818707532e-05, + "loss": 0.3236, + "step": 4190, + "teacher_loss": 0.3077358901500702 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5682482719421387, + "learning_rate": 1.8176955327454097e-05, + "loss": 0.3003, + "step": 4191, + "teacher_loss": 0.27056723833084106 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.3474375605583191, + "learning_rate": 1.8181292467832875e-05, + "loss": 0.2011, + "step": 4192, + "teacher_loss": 0.18484455347061157 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5370516777038574, + "learning_rate": 1.8185629608211652e-05, + "loss": 0.3064, + "step": 4193, + "teacher_loss": 0.28077375888824463 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.2800413966178894, + "learning_rate": 1.818996674859043e-05, + "loss": 0.2229, + "step": 4194, + "teacher_loss": 0.21660469472408295 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5470927357673645, + "learning_rate": 1.8194303888969207e-05, + "loss": 0.2936, + "step": 4195, + "teacher_loss": 0.26544079184532166 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.4033074676990509, + "learning_rate": 1.8198641029347985e-05, + "loss": 0.2495, + "step": 4196, + "teacher_loss": 0.2324121594429016 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.46007657051086426, + "learning_rate": 1.8202978169726763e-05, + "loss": 0.2429, + "step": 4197, + "teacher_loss": 0.21881017088890076 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.40594303607940674, + "learning_rate": 1.820731531010554e-05, + "loss": 0.2359, + "step": 4198, + "teacher_loss": 0.21699343621730804 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.21994228661060333, + "learning_rate": 1.8211652450484315e-05, + "loss": 0.1583, + "step": 4199, + "teacher_loss": 0.15144497156143188 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.7465957403182983, + "learning_rate": 1.821598959086309e-05, + "loss": 0.3495, + "step": 4200, + "teacher_loss": 0.30540865659713745 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.2482997477054596, + "learning_rate": 1.8220326731241866e-05, + "loss": 0.1527, + "step": 4201, + "teacher_loss": 0.14203517138957977 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.15918280184268951, + "learning_rate": 1.8224663871620644e-05, + "loss": 0.1606, + "step": 4202, + "teacher_loss": 0.16076231002807617 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.17935830354690552, + "learning_rate": 1.822900101199942e-05, + "loss": 0.1941, + "step": 4203, + "teacher_loss": 0.19575420022010803 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5091278553009033, + "learning_rate": 1.82333381523782e-05, + "loss": 0.3381, + "step": 4204, + "teacher_loss": 0.31904828548431396 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.2724432051181793, + "learning_rate": 1.8237675292756977e-05, + "loss": 0.2053, + "step": 4205, + "teacher_loss": 0.1978590488433838 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5350421071052551, + "learning_rate": 1.8242012433135754e-05, + "loss": 0.2348, + "step": 4206, + "teacher_loss": 0.20147663354873657 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.3698226511478424, + "learning_rate": 1.8246349573514532e-05, + "loss": 0.1819, + "step": 4207, + "teacher_loss": 0.1610046923160553 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.1945790797472, + "learning_rate": 1.8250686713893306e-05, + "loss": 0.138, + "step": 4208, + "teacher_loss": 0.1317148506641388 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5742756128311157, + "learning_rate": 1.8255023854272084e-05, + "loss": 0.269, + "step": 4209, + "teacher_loss": 0.23512838780879974 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.7809941172599792, + "learning_rate": 1.825936099465086e-05, + "loss": 0.252, + "step": 4210, + "teacher_loss": 0.1931881308555603 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.4742361903190613, + "learning_rate": 1.8263698135029636e-05, + "loss": 0.243, + "step": 4211, + "teacher_loss": 0.2172536700963974 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.6045116186141968, + "learning_rate": 1.8268035275408413e-05, + "loss": 0.2794, + "step": 4212, + "teacher_loss": 0.24326679110527039 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.9009673595428467, + "learning_rate": 1.827237241578719e-05, + "loss": 0.3543, + "step": 4213, + "teacher_loss": 0.29360055923461914 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 1.0596833229064941, + "learning_rate": 1.827670955616597e-05, + "loss": 0.3608, + "step": 4214, + "teacher_loss": 0.28312447667121887 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5905895233154297, + "learning_rate": 1.8281046696544746e-05, + "loss": 0.2753, + "step": 4215, + "teacher_loss": 0.24023553729057312 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.33441784977912903, + "learning_rate": 1.8285383836923524e-05, + "loss": 0.211, + "step": 4216, + "teacher_loss": 0.19732201099395752 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.3466756045818329, + "learning_rate": 1.8289720977302298e-05, + "loss": 0.2018, + "step": 4217, + "teacher_loss": 0.18565401434898376 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.46034321188926697, + "learning_rate": 1.8294058117681076e-05, + "loss": 0.1828, + "step": 4218, + "teacher_loss": 0.15200430154800415 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.24992728233337402, + "learning_rate": 1.8298395258059853e-05, + "loss": 0.1572, + "step": 4219, + "teacher_loss": 0.1468605101108551 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.30746909976005554, + "learning_rate": 1.830273239843863e-05, + "loss": 0.3255, + "step": 4220, + "teacher_loss": 0.32745373249053955 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.1922600120306015, + "learning_rate": 1.8307069538817405e-05, + "loss": 0.2117, + "step": 4221, + "teacher_loss": 0.21388307213783264 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.46663790941238403, + "learning_rate": 1.8311406679196183e-05, + "loss": 0.3957, + "step": 4222, + "teacher_loss": 0.3878590762615204 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.46185216307640076, + "learning_rate": 1.831574381957496e-05, + "loss": 0.2853, + "step": 4223, + "teacher_loss": 0.26571404933929443 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5915626883506775, + "learning_rate": 1.8320080959953738e-05, + "loss": 0.3798, + "step": 4224, + "teacher_loss": 0.3563128113746643 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 1.1518059968948364, + "learning_rate": 1.8324418100332516e-05, + "loss": 0.7159, + "step": 4225, + "teacher_loss": 0.6674721240997314 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.5562552809715271, + "learning_rate": 1.832875524071129e-05, + "loss": 0.3238, + "step": 4226, + "teacher_loss": 0.2979458272457123 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.20705249905586243, + "learning_rate": 1.8333092381090068e-05, + "loss": 0.1829, + "step": 4227, + "teacher_loss": 0.1801748275756836 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.6600640416145325, + "learning_rate": 1.8337429521468845e-05, + "loss": 0.3064, + "step": 4228, + "teacher_loss": 0.2671135663986206 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.3625275790691376, + "learning_rate": 1.8341766661847623e-05, + "loss": 0.2495, + "step": 4229, + "teacher_loss": 0.23694270849227905 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.38866570591926575, + "learning_rate": 1.83461038022264e-05, + "loss": 0.2127, + "step": 4230, + "teacher_loss": 0.19310730695724487 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.3258657157421112, + "learning_rate": 1.8350440942605178e-05, + "loss": 0.2647, + "step": 4231, + "teacher_loss": 0.25786906480789185 + }, + { + "compression_loss": 0.0, + "epoch": 0.76, + "label_loss": 0.582978367805481, + "learning_rate": 1.8354778082983952e-05, + "loss": 0.303, + "step": 4232, + "teacher_loss": 0.2718997299671173 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.4374690651893616, + "learning_rate": 1.835911522336273e-05, + "loss": 0.2043, + "step": 4233, + "teacher_loss": 0.17836229503154755 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.5784834027290344, + "learning_rate": 1.8363452363741504e-05, + "loss": 0.2032, + "step": 4234, + "teacher_loss": 0.1615065187215805 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.7325087189674377, + "learning_rate": 1.8367789504120282e-05, + "loss": 0.2505, + "step": 4235, + "teacher_loss": 0.19689740240573883 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 1.058279037475586, + "learning_rate": 1.837212664449906e-05, + "loss": 0.5036, + "step": 4236, + "teacher_loss": 0.44200754165649414 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.5671568512916565, + "learning_rate": 1.8376463784877837e-05, + "loss": 0.2633, + "step": 4237, + "teacher_loss": 0.22953970730304718 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.7244428396224976, + "learning_rate": 1.8380800925256615e-05, + "loss": 0.2668, + "step": 4238, + "teacher_loss": 0.21591055393218994 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.7140688300132751, + "learning_rate": 1.8385138065635392e-05, + "loss": 0.2976, + "step": 4239, + "teacher_loss": 0.25131598114967346 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 1.1110929250717163, + "learning_rate": 1.838947520601417e-05, + "loss": 0.3138, + "step": 4240, + "teacher_loss": 0.2252422571182251 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.3740127980709076, + "learning_rate": 1.8393812346392948e-05, + "loss": 0.2078, + "step": 4241, + "teacher_loss": 0.18930116295814514 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.6471554040908813, + "learning_rate": 1.8398149486771725e-05, + "loss": 0.4757, + "step": 4242, + "teacher_loss": 0.45665058493614197 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.3991910219192505, + "learning_rate": 1.8402486627150496e-05, + "loss": 0.1933, + "step": 4243, + "teacher_loss": 0.17043735086917877 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.10748796164989471, + "learning_rate": 1.8406823767529274e-05, + "loss": 0.1068, + "step": 4244, + "teacher_loss": 0.1067567691206932 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.1807546615600586, + "learning_rate": 1.841116090790805e-05, + "loss": 0.2771, + "step": 4245, + "teacher_loss": 0.28783339262008667 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.3415837585926056, + "learning_rate": 1.841549804828683e-05, + "loss": 0.2782, + "step": 4246, + "teacher_loss": 0.2711701989173889 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.26681897044181824, + "learning_rate": 1.8419835188665607e-05, + "loss": 0.2756, + "step": 4247, + "teacher_loss": 0.27655911445617676 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.4825130105018616, + "learning_rate": 1.8424172329044384e-05, + "loss": 0.2137, + "step": 4248, + "teacher_loss": 0.18379493057727814 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.4611910581588745, + "learning_rate": 1.8428509469423162e-05, + "loss": 0.2992, + "step": 4249, + "teacher_loss": 0.28123822808265686 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.29843151569366455, + "learning_rate": 1.843284660980194e-05, + "loss": 0.1978, + "step": 4250, + "teacher_loss": 0.18664315342903137 + }, + { + "epoch": 0.77, + "eval_exact_match": 79.70671712393566, + "eval_f1": 87.20175780173301, + "step": 4250 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.8695374727249146, + "learning_rate": 1.8437183750180717e-05, + "loss": 0.2438, + "step": 4251, + "teacher_loss": 0.17428690195083618 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.32610562443733215, + "learning_rate": 1.844152089055949e-05, + "loss": 0.2326, + "step": 4252, + "teacher_loss": 0.2221592217683792 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.3297593295574188, + "learning_rate": 1.844585803093827e-05, + "loss": 0.2234, + "step": 4253, + "teacher_loss": 0.2115522027015686 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.8076539039611816, + "learning_rate": 1.8450195171317043e-05, + "loss": 0.346, + "step": 4254, + "teacher_loss": 0.2947551906108856 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.20926666259765625, + "learning_rate": 1.845453231169582e-05, + "loss": 0.2008, + "step": 4255, + "teacher_loss": 0.1998097151517868 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.2916312515735626, + "learning_rate": 1.84588694520746e-05, + "loss": 0.2409, + "step": 4256, + "teacher_loss": 0.2352708876132965 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.5025413036346436, + "learning_rate": 1.8463206592453376e-05, + "loss": 0.2446, + "step": 4257, + "teacher_loss": 0.21592681109905243 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.1686260998249054, + "learning_rate": 1.8467543732832154e-05, + "loss": 0.1563, + "step": 4258, + "teacher_loss": 0.1549108773469925 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.5989721417427063, + "learning_rate": 1.847188087321093e-05, + "loss": 0.2573, + "step": 4259, + "teacher_loss": 0.21933308243751526 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.8840031623840332, + "learning_rate": 1.847621801358971e-05, + "loss": 0.2707, + "step": 4260, + "teacher_loss": 0.2025284469127655 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.400000661611557, + "learning_rate": 1.8480555153968483e-05, + "loss": 0.2763, + "step": 4261, + "teacher_loss": 0.2625943422317505 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.803421676158905, + "learning_rate": 1.848489229434726e-05, + "loss": 0.4833, + "step": 4262, + "teacher_loss": 0.4477643370628357 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.24858370423316956, + "learning_rate": 1.8489229434726038e-05, + "loss": 0.1734, + "step": 4263, + "teacher_loss": 0.16503044962882996 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.4687172770500183, + "learning_rate": 1.8493566575104816e-05, + "loss": 0.2631, + "step": 4264, + "teacher_loss": 0.240285724401474 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.33411869406700134, + "learning_rate": 1.849790371548359e-05, + "loss": 0.2899, + "step": 4265, + "teacher_loss": 0.28500452637672424 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.19061389565467834, + "learning_rate": 1.8502240855862368e-05, + "loss": 0.1804, + "step": 4266, + "teacher_loss": 0.17924764752388 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.4153069853782654, + "learning_rate": 1.8506577996241145e-05, + "loss": 0.3503, + "step": 4267, + "teacher_loss": 0.34306925535202026 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.42755013704299927, + "learning_rate": 1.8510915136619923e-05, + "loss": 0.2727, + "step": 4268, + "teacher_loss": 0.2555101811885834 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.39209532737731934, + "learning_rate": 1.85152522769987e-05, + "loss": 0.1872, + "step": 4269, + "teacher_loss": 0.16439926624298096 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.46185368299484253, + "learning_rate": 1.8519589417377475e-05, + "loss": 0.2132, + "step": 4270, + "teacher_loss": 0.18558794260025024 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.36124539375305176, + "learning_rate": 1.8523926557756252e-05, + "loss": 0.26, + "step": 4271, + "teacher_loss": 0.2487795650959015 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.3830479383468628, + "learning_rate": 1.852826369813503e-05, + "loss": 0.2743, + "step": 4272, + "teacher_loss": 0.2621828317642212 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.28602516651153564, + "learning_rate": 1.8532600838513808e-05, + "loss": 0.1981, + "step": 4273, + "teacher_loss": 0.18838083744049072 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.46078264713287354, + "learning_rate": 1.8536937978892585e-05, + "loss": 0.287, + "step": 4274, + "teacher_loss": 0.26765817403793335 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.6124197244644165, + "learning_rate": 1.8541275119271363e-05, + "loss": 0.2696, + "step": 4275, + "teacher_loss": 0.23152336478233337 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.655608057975769, + "learning_rate": 1.8545612259650137e-05, + "loss": 0.3377, + "step": 4276, + "teacher_loss": 0.30232250690460205 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.8387855887413025, + "learning_rate": 1.8549949400028915e-05, + "loss": 0.2682, + "step": 4277, + "teacher_loss": 0.2047930657863617 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.2550361454486847, + "learning_rate": 1.855428654040769e-05, + "loss": 0.2114, + "step": 4278, + "teacher_loss": 0.206498384475708 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.6667413711547852, + "learning_rate": 1.8558623680786467e-05, + "loss": 0.3013, + "step": 4279, + "teacher_loss": 0.26074278354644775 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.46464070677757263, + "learning_rate": 1.8562960821165244e-05, + "loss": 0.2526, + "step": 4280, + "teacher_loss": 0.2290804088115692 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.4909101724624634, + "learning_rate": 1.8567297961544022e-05, + "loss": 0.2234, + "step": 4281, + "teacher_loss": 0.19365081191062927 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.7285306453704834, + "learning_rate": 1.85716351019228e-05, + "loss": 0.2986, + "step": 4282, + "teacher_loss": 0.2508789300918579 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.1851373016834259, + "learning_rate": 1.8575972242301577e-05, + "loss": 0.2098, + "step": 4283, + "teacher_loss": 0.21254321932792664 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.505715012550354, + "learning_rate": 1.8580309382680355e-05, + "loss": 0.2558, + "step": 4284, + "teacher_loss": 0.22800683975219727 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.7456421256065369, + "learning_rate": 1.8584646523059132e-05, + "loss": 0.4703, + "step": 4285, + "teacher_loss": 0.43973881006240845 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.40645524859428406, + "learning_rate": 1.858898366343791e-05, + "loss": 0.2197, + "step": 4286, + "teacher_loss": 0.19894936680793762 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.6293581128120422, + "learning_rate": 1.859332080381668e-05, + "loss": 0.6297, + "step": 4287, + "teacher_loss": 0.629779577255249 + }, + { + "compression_loss": 0.0, + "epoch": 0.77, + "label_loss": 0.6857258677482605, + "learning_rate": 1.859765794419546e-05, + "loss": 0.328, + "step": 4288, + "teacher_loss": 0.2882636785507202 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.510424017906189, + "learning_rate": 1.8601995084574236e-05, + "loss": 0.2482, + "step": 4289, + "teacher_loss": 0.21901285648345947 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.611987292766571, + "learning_rate": 1.8606332224953014e-05, + "loss": 0.2844, + "step": 4290, + "teacher_loss": 0.24800626933574677 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4525676369667053, + "learning_rate": 1.861066936533179e-05, + "loss": 0.2028, + "step": 4291, + "teacher_loss": 0.17501947283744812 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.47354036569595337, + "learning_rate": 1.861500650571057e-05, + "loss": 0.3284, + "step": 4292, + "teacher_loss": 0.3122839629650116 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.213621586561203, + "learning_rate": 1.8619343646089347e-05, + "loss": 0.2245, + "step": 4293, + "teacher_loss": 0.22573347389698029 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.6952092051506042, + "learning_rate": 1.8623680786468124e-05, + "loss": 0.3585, + "step": 4294, + "teacher_loss": 0.32106664776802063 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.18327361345291138, + "learning_rate": 1.8628017926846902e-05, + "loss": 0.1669, + "step": 4295, + "teacher_loss": 0.16503292322158813 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4421996772289276, + "learning_rate": 1.8632355067225676e-05, + "loss": 0.2507, + "step": 4296, + "teacher_loss": 0.2294618785381317 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5327869653701782, + "learning_rate": 1.8636692207604454e-05, + "loss": 0.2367, + "step": 4297, + "teacher_loss": 0.2037869691848755 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.40128207206726074, + "learning_rate": 1.8641029347983228e-05, + "loss": 0.1956, + "step": 4298, + "teacher_loss": 0.17278623580932617 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.2973548173904419, + "learning_rate": 1.8645366488362006e-05, + "loss": 0.2575, + "step": 4299, + "teacher_loss": 0.25308579206466675 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4530125558376312, + "learning_rate": 1.8649703628740783e-05, + "loss": 0.2296, + "step": 4300, + "teacher_loss": 0.20473836362361908 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.2388342022895813, + "learning_rate": 1.865404076911956e-05, + "loss": 0.1925, + "step": 4301, + "teacher_loss": 0.187351256608963 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.368000864982605, + "learning_rate": 1.865837790949834e-05, + "loss": 0.2046, + "step": 4302, + "teacher_loss": 0.18639764189720154 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.6789907217025757, + "learning_rate": 1.8662715049877116e-05, + "loss": 0.2883, + "step": 4303, + "teacher_loss": 0.24485091865062714 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.7069035768508911, + "learning_rate": 1.8667052190255894e-05, + "loss": 0.3865, + "step": 4304, + "teacher_loss": 0.3508761525154114 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3162829577922821, + "learning_rate": 1.8671389330634668e-05, + "loss": 0.1527, + "step": 4305, + "teacher_loss": 0.13452647626399994 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.2658165693283081, + "learning_rate": 1.8675726471013446e-05, + "loss": 0.2238, + "step": 4306, + "teacher_loss": 0.21911801397800446 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.8025497198104858, + "learning_rate": 1.8680063611392223e-05, + "loss": 0.3371, + "step": 4307, + "teacher_loss": 0.2854340076446533 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3647902011871338, + "learning_rate": 1.8684400751771e-05, + "loss": 0.2645, + "step": 4308, + "teacher_loss": 0.25339561700820923 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4634060859680176, + "learning_rate": 1.8688737892149775e-05, + "loss": 0.2643, + "step": 4309, + "teacher_loss": 0.24221907556056976 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5004192590713501, + "learning_rate": 1.8693075032528553e-05, + "loss": 0.2147, + "step": 4310, + "teacher_loss": 0.18291622400283813 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5047429203987122, + "learning_rate": 1.869741217290733e-05, + "loss": 0.2629, + "step": 4311, + "teacher_loss": 0.23603002727031708 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.29406580328941345, + "learning_rate": 1.8701749313286108e-05, + "loss": 0.201, + "step": 4312, + "teacher_loss": 0.1907017081975937 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.22989502549171448, + "learning_rate": 1.8706086453664886e-05, + "loss": 0.2088, + "step": 4313, + "teacher_loss": 0.20644724369049072 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5856242179870605, + "learning_rate": 1.871042359404366e-05, + "loss": 0.2436, + "step": 4314, + "teacher_loss": 0.20557467639446259 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.7974061369895935, + "learning_rate": 1.8714760734422437e-05, + "loss": 0.5391, + "step": 4315, + "teacher_loss": 0.5103491544723511 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3375333547592163, + "learning_rate": 1.8719097874801215e-05, + "loss": 0.2046, + "step": 4316, + "teacher_loss": 0.18983879685401917 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4497114419937134, + "learning_rate": 1.8723435015179993e-05, + "loss": 0.3025, + "step": 4317, + "teacher_loss": 0.28611230850219727 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.37641435861587524, + "learning_rate": 1.872777215555877e-05, + "loss": 0.4076, + "step": 4318, + "teacher_loss": 0.4110143780708313 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.38564983010292053, + "learning_rate": 1.8732109295937548e-05, + "loss": 0.2584, + "step": 4319, + "teacher_loss": 0.24423296749591827 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.9457871913909912, + "learning_rate": 1.8736446436316322e-05, + "loss": 0.4431, + "step": 4320, + "teacher_loss": 0.38722193241119385 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.19923396408557892, + "learning_rate": 1.87407835766951e-05, + "loss": 0.1703, + "step": 4321, + "teacher_loss": 0.16704685986042023 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4149247407913208, + "learning_rate": 1.8745120717073877e-05, + "loss": 0.2437, + "step": 4322, + "teacher_loss": 0.2247181087732315 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.2385530173778534, + "learning_rate": 1.874945785745265e-05, + "loss": 0.2707, + "step": 4323, + "teacher_loss": 0.2742440700531006 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.2685689330101013, + "learning_rate": 1.875379499783143e-05, + "loss": 0.1763, + "step": 4324, + "teacher_loss": 0.16601906716823578 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3340357840061188, + "learning_rate": 1.8758132138210207e-05, + "loss": 0.2406, + "step": 4325, + "teacher_loss": 0.2302560806274414 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.2699044942855835, + "learning_rate": 1.8762469278588984e-05, + "loss": 0.2055, + "step": 4326, + "teacher_loss": 0.1983424872159958 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.11705869436264038, + "learning_rate": 1.8766806418967762e-05, + "loss": 0.1649, + "step": 4327, + "teacher_loss": 0.17017850279808044 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.45480579137802124, + "learning_rate": 1.877114355934654e-05, + "loss": 0.2887, + "step": 4328, + "teacher_loss": 0.27021324634552 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.6701047420501709, + "learning_rate": 1.8775480699725317e-05, + "loss": 0.2893, + "step": 4329, + "teacher_loss": 0.24704372882843018 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4310171902179718, + "learning_rate": 1.877981784010409e-05, + "loss": 0.2244, + "step": 4330, + "teacher_loss": 0.20142894983291626 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4295814037322998, + "learning_rate": 1.8784154980482866e-05, + "loss": 0.2449, + "step": 4331, + "teacher_loss": 0.22436144948005676 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.1813291609287262, + "learning_rate": 1.8788492120861643e-05, + "loss": 0.1505, + "step": 4332, + "teacher_loss": 0.14709439873695374 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5309654474258423, + "learning_rate": 1.879282926124042e-05, + "loss": 0.2564, + "step": 4333, + "teacher_loss": 0.22584135830402374 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3411603569984436, + "learning_rate": 1.87971664016192e-05, + "loss": 0.2055, + "step": 4334, + "teacher_loss": 0.19046136736869812 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5316437482833862, + "learning_rate": 1.8801503541997976e-05, + "loss": 0.3266, + "step": 4335, + "teacher_loss": 0.3037664294242859 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.5856420993804932, + "learning_rate": 1.8805840682376754e-05, + "loss": 0.2513, + "step": 4336, + "teacher_loss": 0.21410749852657318 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.12336384505033493, + "learning_rate": 1.881017782275553e-05, + "loss": 0.2165, + "step": 4337, + "teacher_loss": 0.22685931622982025 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4408440589904785, + "learning_rate": 1.881451496313431e-05, + "loss": 0.1913, + "step": 4338, + "teacher_loss": 0.16360166668891907 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.31678009033203125, + "learning_rate": 1.8818852103513087e-05, + "loss": 0.1868, + "step": 4339, + "teacher_loss": 0.17239277064800262 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3320227265357971, + "learning_rate": 1.882318924389186e-05, + "loss": 0.2661, + "step": 4340, + "teacher_loss": 0.25879985094070435 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.6310441493988037, + "learning_rate": 1.8827526384270635e-05, + "loss": 0.2476, + "step": 4341, + "teacher_loss": 0.2049868404865265 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.4937819838523865, + "learning_rate": 1.8831863524649413e-05, + "loss": 0.3782, + "step": 4342, + "teacher_loss": 0.36540526151657104 + }, + { + "compression_loss": 0.0, + "epoch": 0.78, + "label_loss": 0.3894301950931549, + "learning_rate": 1.883620066502819e-05, + "loss": 0.3566, + "step": 4343, + "teacher_loss": 0.35293471813201904 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.7206357717514038, + "learning_rate": 1.8840537805406968e-05, + "loss": 0.3451, + "step": 4344, + "teacher_loss": 0.3033825159072876 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.26766490936279297, + "learning_rate": 1.8844874945785746e-05, + "loss": 0.1951, + "step": 4345, + "teacher_loss": 0.18700119853019714 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4767746031284332, + "learning_rate": 1.8849212086164523e-05, + "loss": 0.2943, + "step": 4346, + "teacher_loss": 0.2739824652671814 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.45957350730895996, + "learning_rate": 1.88535492265433e-05, + "loss": 0.2478, + "step": 4347, + "teacher_loss": 0.22431239485740662 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.5060524344444275, + "learning_rate": 1.885788636692208e-05, + "loss": 0.4291, + "step": 4348, + "teacher_loss": 0.42052415013313293 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.3664916157722473, + "learning_rate": 1.8862223507300853e-05, + "loss": 0.1967, + "step": 4349, + "teacher_loss": 0.1778573989868164 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.16738006472587585, + "learning_rate": 1.886656064767963e-05, + "loss": 0.1769, + "step": 4350, + "teacher_loss": 0.1779191941022873 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4389292001724243, + "learning_rate": 1.8870897788058408e-05, + "loss": 0.3306, + "step": 4351, + "teacher_loss": 0.31855449080467224 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.18302121758460999, + "learning_rate": 1.8875234928437182e-05, + "loss": 0.1943, + "step": 4352, + "teacher_loss": 0.1955508589744568 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.2137041985988617, + "learning_rate": 1.887957206881596e-05, + "loss": 0.2202, + "step": 4353, + "teacher_loss": 0.22086820006370544 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.31997427344322205, + "learning_rate": 1.8883909209194738e-05, + "loss": 0.1451, + "step": 4354, + "teacher_loss": 0.12569984793663025 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4039191007614136, + "learning_rate": 1.8888246349573515e-05, + "loss": 0.1827, + "step": 4355, + "teacher_loss": 0.15808013081550598 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.24972745776176453, + "learning_rate": 1.8892583489952293e-05, + "loss": 0.2618, + "step": 4356, + "teacher_loss": 0.2631451487541199 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.5144719481468201, + "learning_rate": 1.889692063033107e-05, + "loss": 0.21, + "step": 4357, + "teacher_loss": 0.17612047493457794 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.37136709690093994, + "learning_rate": 1.8901257770709845e-05, + "loss": 0.2455, + "step": 4358, + "teacher_loss": 0.2315484881401062 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.37611523270606995, + "learning_rate": 1.8905594911088622e-05, + "loss": 0.2167, + "step": 4359, + "teacher_loss": 0.19899940490722656 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.1258644163608551, + "learning_rate": 1.89099320514674e-05, + "loss": 0.1974, + "step": 4360, + "teacher_loss": 0.2053266167640686 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 1.2536430358886719, + "learning_rate": 1.8914269191846178e-05, + "loss": 0.3545, + "step": 4361, + "teacher_loss": 0.2545698583126068 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4635392427444458, + "learning_rate": 1.8918606332224955e-05, + "loss": 0.2167, + "step": 4362, + "teacher_loss": 0.18924319744110107 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.20551873743534088, + "learning_rate": 1.892294347260373e-05, + "loss": 0.2025, + "step": 4363, + "teacher_loss": 0.20219628512859344 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 1.0362813472747803, + "learning_rate": 1.8927280612982507e-05, + "loss": 0.375, + "step": 4364, + "teacher_loss": 0.30153319239616394 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.6008142828941345, + "learning_rate": 1.8931617753361285e-05, + "loss": 0.2931, + "step": 4365, + "teacher_loss": 0.2588798403739929 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4625106453895569, + "learning_rate": 1.8935954893740062e-05, + "loss": 0.3069, + "step": 4366, + "teacher_loss": 0.2896609306335449 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.1407613456249237, + "learning_rate": 1.8940292034118836e-05, + "loss": 0.2725, + "step": 4367, + "teacher_loss": 0.2870877981185913 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.10765865445137024, + "learning_rate": 1.8944629174497614e-05, + "loss": 0.17, + "step": 4368, + "teacher_loss": 0.17694024741649628 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.32489126920700073, + "learning_rate": 1.8948966314876392e-05, + "loss": 0.1743, + "step": 4369, + "teacher_loss": 0.15759655833244324 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.9479628205299377, + "learning_rate": 1.895330345525517e-05, + "loss": 0.309, + "step": 4370, + "teacher_loss": 0.23800595104694366 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.3562701940536499, + "learning_rate": 1.8957640595633947e-05, + "loss": 0.2658, + "step": 4371, + "teacher_loss": 0.25572866201400757 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.7417806386947632, + "learning_rate": 1.8961977736012725e-05, + "loss": 0.2852, + "step": 4372, + "teacher_loss": 0.23442819714546204 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.28376078605651855, + "learning_rate": 1.8966314876391502e-05, + "loss": 0.1882, + "step": 4373, + "teacher_loss": 0.1775858998298645 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.2423812448978424, + "learning_rate": 1.8970652016770276e-05, + "loss": 0.2022, + "step": 4374, + "teacher_loss": 0.19768668711185455 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.6269419193267822, + "learning_rate": 1.897498915714905e-05, + "loss": 0.2639, + "step": 4375, + "teacher_loss": 0.2235143482685089 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.9645397663116455, + "learning_rate": 1.8979326297527828e-05, + "loss": 0.3536, + "step": 4376, + "teacher_loss": 0.285714715719223 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4486308693885803, + "learning_rate": 1.8983663437906606e-05, + "loss": 0.2232, + "step": 4377, + "teacher_loss": 0.19817957282066345 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.7429558634757996, + "learning_rate": 1.8988000578285384e-05, + "loss": 0.3039, + "step": 4378, + "teacher_loss": 0.2551349997520447 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.46867793798446655, + "learning_rate": 1.899233771866416e-05, + "loss": 0.3604, + "step": 4379, + "teacher_loss": 0.34834498167037964 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.35948532819747925, + "learning_rate": 1.899667485904294e-05, + "loss": 0.2138, + "step": 4380, + "teacher_loss": 0.1976063847541809 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.5096275806427002, + "learning_rate": 1.9001011999421716e-05, + "loss": 0.2612, + "step": 4381, + "teacher_loss": 0.23363693058490753 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.2687079906463623, + "learning_rate": 1.9005349139800494e-05, + "loss": 0.2021, + "step": 4382, + "teacher_loss": 0.19468240439891815 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4174654185771942, + "learning_rate": 1.900968628017927e-05, + "loss": 0.2269, + "step": 4383, + "teacher_loss": 0.20571528375148773 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.34681329131126404, + "learning_rate": 1.9014023420558046e-05, + "loss": 0.1947, + "step": 4384, + "teacher_loss": 0.17775966227054596 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.47667616605758667, + "learning_rate": 1.901836056093682e-05, + "loss": 0.3132, + "step": 4385, + "teacher_loss": 0.2949827015399933 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.5764468908309937, + "learning_rate": 1.9022697701315598e-05, + "loss": 0.2507, + "step": 4386, + "teacher_loss": 0.2144799530506134 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.20920279622077942, + "learning_rate": 1.9027034841694375e-05, + "loss": 0.217, + "step": 4387, + "teacher_loss": 0.21788394451141357 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.4802161455154419, + "learning_rate": 1.9031371982073153e-05, + "loss": 0.3171, + "step": 4388, + "teacher_loss": 0.2989689111709595 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.3222218155860901, + "learning_rate": 1.903570912245193e-05, + "loss": 0.245, + "step": 4389, + "teacher_loss": 0.2364031821489334 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.692144513130188, + "learning_rate": 1.9040046262830708e-05, + "loss": 0.3058, + "step": 4390, + "teacher_loss": 0.26290571689605713 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.49712514877319336, + "learning_rate": 1.9044383403209486e-05, + "loss": 0.289, + "step": 4391, + "teacher_loss": 0.26592397689819336 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.6742779016494751, + "learning_rate": 1.9048720543588263e-05, + "loss": 0.3311, + "step": 4392, + "teacher_loss": 0.292987585067749 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.18801485002040863, + "learning_rate": 1.9053057683967038e-05, + "loss": 0.2078, + "step": 4393, + "teacher_loss": 0.2099807858467102 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.5727881789207458, + "learning_rate": 1.9057394824345815e-05, + "loss": 0.2674, + "step": 4394, + "teacher_loss": 0.23346683382987976 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.41732239723205566, + "learning_rate": 1.9061731964724593e-05, + "loss": 0.2591, + "step": 4395, + "teacher_loss": 0.2415492683649063 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.5799175500869751, + "learning_rate": 1.9066069105103367e-05, + "loss": 0.3929, + "step": 4396, + "teacher_loss": 0.37212449312210083 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.27822983264923096, + "learning_rate": 1.9070406245482145e-05, + "loss": 0.2241, + "step": 4397, + "teacher_loss": 0.21813462674617767 + }, + { + "compression_loss": 0.0, + "epoch": 0.79, + "label_loss": 0.47497743368148804, + "learning_rate": 1.9074743385860922e-05, + "loss": 0.221, + "step": 4398, + "teacher_loss": 0.19275131821632385 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.46600785851478577, + "learning_rate": 1.90790805262397e-05, + "loss": 0.2605, + "step": 4399, + "teacher_loss": 0.23764127492904663 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.5026023983955383, + "learning_rate": 1.9083417666618478e-05, + "loss": 0.2039, + "step": 4400, + "teacher_loss": 0.1706666499376297 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.3185422718524933, + "learning_rate": 1.9087754806997255e-05, + "loss": 0.1971, + "step": 4401, + "teacher_loss": 0.183591827750206 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.17662450671195984, + "learning_rate": 1.909209194737603e-05, + "loss": 0.1764, + "step": 4402, + "teacher_loss": 0.17642992734909058 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.6701233983039856, + "learning_rate": 1.9096429087754807e-05, + "loss": 0.2296, + "step": 4403, + "teacher_loss": 0.18062585592269897 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.3591124415397644, + "learning_rate": 1.9100766228133585e-05, + "loss": 0.2458, + "step": 4404, + "teacher_loss": 0.23325462639331818 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.3126135468482971, + "learning_rate": 1.9105103368512362e-05, + "loss": 0.2359, + "step": 4405, + "teacher_loss": 0.22732990980148315 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.606193482875824, + "learning_rate": 1.910944050889114e-05, + "loss": 0.3175, + "step": 4406, + "teacher_loss": 0.28538602590560913 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.5236458778381348, + "learning_rate": 1.9113777649269914e-05, + "loss": 0.3086, + "step": 4407, + "teacher_loss": 0.2846558094024658 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.7959356307983398, + "learning_rate": 1.9118114789648692e-05, + "loss": 0.2892, + "step": 4408, + "teacher_loss": 0.232901930809021 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.11919496953487396, + "learning_rate": 1.912245193002747e-05, + "loss": 0.2229, + "step": 4409, + "teacher_loss": 0.23442482948303223 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.1469697207212448, + "learning_rate": 1.9126789070406247e-05, + "loss": 0.1882, + "step": 4410, + "teacher_loss": 0.19283178448677063 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.6454266309738159, + "learning_rate": 1.913112621078502e-05, + "loss": 0.223, + "step": 4411, + "teacher_loss": 0.17607924342155457 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.4967457354068756, + "learning_rate": 1.91354633511638e-05, + "loss": 0.2718, + "step": 4412, + "teacher_loss": 0.24685239791870117 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.20355309545993805, + "learning_rate": 1.9139800491542577e-05, + "loss": 0.1974, + "step": 4413, + "teacher_loss": 0.19666561484336853 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.5515495538711548, + "learning_rate": 1.9144137631921354e-05, + "loss": 0.2753, + "step": 4414, + "teacher_loss": 0.24464194476604462 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.7646429538726807, + "learning_rate": 1.9148474772300132e-05, + "loss": 0.3109, + "step": 4415, + "teacher_loss": 0.26045793294906616 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.48189833760261536, + "learning_rate": 1.915281191267891e-05, + "loss": 0.2156, + "step": 4416, + "teacher_loss": 0.18598628044128418 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.23272691667079926, + "learning_rate": 1.9157149053057687e-05, + "loss": 0.1379, + "step": 4417, + "teacher_loss": 0.12737099826335907 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.1874842494726181, + "learning_rate": 1.916148619343646e-05, + "loss": 0.1954, + "step": 4418, + "teacher_loss": 0.1963045299053192 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 1.0665611028671265, + "learning_rate": 1.9165823333815236e-05, + "loss": 0.2963, + "step": 4419, + "teacher_loss": 0.2107498198747635 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.4177074432373047, + "learning_rate": 1.9170160474194013e-05, + "loss": 0.2361, + "step": 4420, + "teacher_loss": 0.215923011302948 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.3376600742340088, + "learning_rate": 1.917449761457279e-05, + "loss": 0.1924, + "step": 4421, + "teacher_loss": 0.1762087643146515 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.4542146921157837, + "learning_rate": 1.917883475495157e-05, + "loss": 0.3138, + "step": 4422, + "teacher_loss": 0.29821836948394775 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.466405987739563, + "learning_rate": 1.9183171895330346e-05, + "loss": 0.2414, + "step": 4423, + "teacher_loss": 0.21644729375839233 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.427219420671463, + "learning_rate": 1.9187509035709124e-05, + "loss": 0.2153, + "step": 4424, + "teacher_loss": 0.19176019728183746 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.40870997309684753, + "learning_rate": 1.91918461760879e-05, + "loss": 0.2934, + "step": 4425, + "teacher_loss": 0.28061914443969727 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.25932276248931885, + "learning_rate": 1.919618331646668e-05, + "loss": 0.1597, + "step": 4426, + "teacher_loss": 0.14860644936561584 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.29136013984680176, + "learning_rate": 1.9200520456845457e-05, + "loss": 0.2701, + "step": 4427, + "teacher_loss": 0.26779091358184814 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.16029603779315948, + "learning_rate": 1.920485759722423e-05, + "loss": 0.1946, + "step": 4428, + "teacher_loss": 0.19842372834682465 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.4553366005420685, + "learning_rate": 1.9209194737603005e-05, + "loss": 0.2888, + "step": 4429, + "teacher_loss": 0.27033478021621704 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.49068546295166016, + "learning_rate": 1.9213531877981783e-05, + "loss": 0.2491, + "step": 4430, + "teacher_loss": 0.22228951752185822 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.15509988367557526, + "learning_rate": 1.921786901836056e-05, + "loss": 0.1933, + "step": 4431, + "teacher_loss": 0.19758708775043488 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.5387653708457947, + "learning_rate": 1.9222206158739338e-05, + "loss": 0.2188, + "step": 4432, + "teacher_loss": 0.18327876925468445 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.3255700469017029, + "learning_rate": 1.9226543299118115e-05, + "loss": 0.1667, + "step": 4433, + "teacher_loss": 0.14904238283634186 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.6602993607521057, + "learning_rate": 1.9230880439496893e-05, + "loss": 0.2633, + "step": 4434, + "teacher_loss": 0.21921959519386292 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.29587626457214355, + "learning_rate": 1.923521757987567e-05, + "loss": 0.3065, + "step": 4435, + "teacher_loss": 0.3076714277267456 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.32020118832588196, + "learning_rate": 1.923955472025445e-05, + "loss": 0.2694, + "step": 4436, + "teacher_loss": 0.2637360692024231 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.4616737961769104, + "learning_rate": 1.9243891860633223e-05, + "loss": 0.2317, + "step": 4437, + "teacher_loss": 0.2061809003353119 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.49196869134902954, + "learning_rate": 1.9248229001012e-05, + "loss": 0.2708, + "step": 4438, + "teacher_loss": 0.24619725346565247 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.7121244072914124, + "learning_rate": 1.9252566141390774e-05, + "loss": 0.2834, + "step": 4439, + "teacher_loss": 0.2357184737920761 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.2487257868051529, + "learning_rate": 1.9256903281769552e-05, + "loss": 0.2492, + "step": 4440, + "teacher_loss": 0.2492329627275467 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 1.1728515625, + "learning_rate": 1.926124042214833e-05, + "loss": 0.443, + "step": 4441, + "teacher_loss": 0.3619272708892822 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.09628164768218994, + "learning_rate": 1.9265577562527107e-05, + "loss": 0.175, + "step": 4442, + "teacher_loss": 0.18369171023368835 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.26564958691596985, + "learning_rate": 1.9269914702905885e-05, + "loss": 0.2133, + "step": 4443, + "teacher_loss": 0.20748114585876465 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.23224981129169464, + "learning_rate": 1.9274251843284663e-05, + "loss": 0.2052, + "step": 4444, + "teacher_loss": 0.2021392285823822 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.23544606566429138, + "learning_rate": 1.927858898366344e-05, + "loss": 0.152, + "step": 4445, + "teacher_loss": 0.14270824193954468 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.4909461736679077, + "learning_rate": 1.9282926124042214e-05, + "loss": 0.2514, + "step": 4446, + "teacher_loss": 0.22475308179855347 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.5512707233428955, + "learning_rate": 1.9287263264420992e-05, + "loss": 0.3369, + "step": 4447, + "teacher_loss": 0.31307852268218994 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.5532106757164001, + "learning_rate": 1.929160040479977e-05, + "loss": 0.1792, + "step": 4448, + "teacher_loss": 0.1376638114452362 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.6336266398429871, + "learning_rate": 1.9295937545178547e-05, + "loss": 0.3304, + "step": 4449, + "teacher_loss": 0.2966945171356201 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.48162052035331726, + "learning_rate": 1.930027468555732e-05, + "loss": 0.2549, + "step": 4450, + "teacher_loss": 0.2297634780406952 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.3254720866680145, + "learning_rate": 1.93046118259361e-05, + "loss": 0.2556, + "step": 4451, + "teacher_loss": 0.247822105884552 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.24370527267456055, + "learning_rate": 1.9308948966314877e-05, + "loss": 0.1881, + "step": 4452, + "teacher_loss": 0.18191999197006226 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.22032126784324646, + "learning_rate": 1.9313286106693654e-05, + "loss": 0.1952, + "step": 4453, + "teacher_loss": 0.1924436092376709 + }, + { + "compression_loss": 0.0, + "epoch": 0.8, + "label_loss": 0.24624371528625488, + "learning_rate": 1.9317623247072432e-05, + "loss": 0.1953, + "step": 4454, + "teacher_loss": 0.18967103958129883 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.6548234224319458, + "learning_rate": 1.9321960387451206e-05, + "loss": 0.2313, + "step": 4455, + "teacher_loss": 0.1841966211795807 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.33025023341178894, + "learning_rate": 1.9326297527829984e-05, + "loss": 0.2939, + "step": 4456, + "teacher_loss": 0.2898273468017578 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5020718574523926, + "learning_rate": 1.933063466820876e-05, + "loss": 0.2328, + "step": 4457, + "teacher_loss": 0.20286162197589874 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5606284737586975, + "learning_rate": 1.933497180858754e-05, + "loss": 0.2319, + "step": 4458, + "teacher_loss": 0.1953597366809845 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.4259171485900879, + "learning_rate": 1.9339308948966317e-05, + "loss": 0.2569, + "step": 4459, + "teacher_loss": 0.23806552588939667 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.6561440229415894, + "learning_rate": 1.9343646089345094e-05, + "loss": 0.2967, + "step": 4460, + "teacher_loss": 0.2567846179008484 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.49232596158981323, + "learning_rate": 1.934798322972387e-05, + "loss": 0.1883, + "step": 4461, + "teacher_loss": 0.15450426936149597 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5243464708328247, + "learning_rate": 1.9352320370102646e-05, + "loss": 0.2819, + "step": 4462, + "teacher_loss": 0.2549181580543518 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.8740881085395813, + "learning_rate": 1.9356657510481424e-05, + "loss": 0.3465, + "step": 4463, + "teacher_loss": 0.28791743516921997 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5317016839981079, + "learning_rate": 1.9360994650860198e-05, + "loss": 0.1958, + "step": 4464, + "teacher_loss": 0.15848064422607422 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.26249051094055176, + "learning_rate": 1.9365331791238976e-05, + "loss": 0.2518, + "step": 4465, + "teacher_loss": 0.2506164610385895 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.314662367105484, + "learning_rate": 1.9369668931617753e-05, + "loss": 0.238, + "step": 4466, + "teacher_loss": 0.22951368987560272 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.2412196695804596, + "learning_rate": 1.937400607199653e-05, + "loss": 0.2267, + "step": 4467, + "teacher_loss": 0.22508668899536133 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.25784194469451904, + "learning_rate": 1.937834321237531e-05, + "loss": 0.2289, + "step": 4468, + "teacher_loss": 0.22573524713516235 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.19536930322647095, + "learning_rate": 1.9382680352754086e-05, + "loss": 0.2332, + "step": 4469, + "teacher_loss": 0.23738746345043182 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.4496898949146271, + "learning_rate": 1.9387017493132864e-05, + "loss": 0.2513, + "step": 4470, + "teacher_loss": 0.2292943298816681 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5700011253356934, + "learning_rate": 1.939135463351164e-05, + "loss": 0.2542, + "step": 4471, + "teacher_loss": 0.21911413967609406 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.2527185082435608, + "learning_rate": 1.9395691773890412e-05, + "loss": 0.1704, + "step": 4472, + "teacher_loss": 0.1612669825553894 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.22570839524269104, + "learning_rate": 1.940002891426919e-05, + "loss": 0.2097, + "step": 4473, + "teacher_loss": 0.20792317390441895 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.47496742010116577, + "learning_rate": 1.9404366054647967e-05, + "loss": 0.1577, + "step": 4474, + "teacher_loss": 0.12246362864971161 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.34297117590904236, + "learning_rate": 1.9408703195026745e-05, + "loss": 0.2986, + "step": 4475, + "teacher_loss": 0.29361492395401 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5019428133964539, + "learning_rate": 1.9413040335405523e-05, + "loss": 0.3957, + "step": 4476, + "teacher_loss": 0.3838716745376587 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.293230801820755, + "learning_rate": 1.94173774757843e-05, + "loss": 0.3029, + "step": 4477, + "teacher_loss": 0.3039575517177582 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.7814189195632935, + "learning_rate": 1.9421714616163078e-05, + "loss": 0.3019, + "step": 4478, + "teacher_loss": 0.24858203530311584 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.34480249881744385, + "learning_rate": 1.9426051756541856e-05, + "loss": 0.1764, + "step": 4479, + "teacher_loss": 0.1577427089214325 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.7515468597412109, + "learning_rate": 1.9430388896920633e-05, + "loss": 0.34, + "step": 4480, + "teacher_loss": 0.29423367977142334 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.21799224615097046, + "learning_rate": 1.9434726037299407e-05, + "loss": 0.2388, + "step": 4481, + "teacher_loss": 0.24106183648109436 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 1.3107478618621826, + "learning_rate": 1.9439063177678185e-05, + "loss": 0.3998, + "step": 4482, + "teacher_loss": 0.29853737354278564 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.48927634954452515, + "learning_rate": 1.944340031805696e-05, + "loss": 0.2174, + "step": 4483, + "teacher_loss": 0.18718752264976501 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.7885831594467163, + "learning_rate": 1.9447737458435737e-05, + "loss": 0.319, + "step": 4484, + "teacher_loss": 0.26680922508239746 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.26493918895721436, + "learning_rate": 1.9452074598814515e-05, + "loss": 0.1657, + "step": 4485, + "teacher_loss": 0.15462952852249146 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.3576187193393707, + "learning_rate": 1.9456411739193292e-05, + "loss": 0.1861, + "step": 4486, + "teacher_loss": 0.16705426573753357 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.727882981300354, + "learning_rate": 1.946074887957207e-05, + "loss": 0.3635, + "step": 4487, + "teacher_loss": 0.3230496048927307 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.6405210494995117, + "learning_rate": 1.9465086019950847e-05, + "loss": 0.231, + "step": 4488, + "teacher_loss": 0.18550699949264526 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.3781351149082184, + "learning_rate": 1.9469423160329625e-05, + "loss": 0.2928, + "step": 4489, + "teacher_loss": 0.283273845911026 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.4769185781478882, + "learning_rate": 1.94737603007084e-05, + "loss": 0.1955, + "step": 4490, + "teacher_loss": 0.16424641013145447 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.35994961857795715, + "learning_rate": 1.9478097441087177e-05, + "loss": 0.2571, + "step": 4491, + "teacher_loss": 0.24570125341415405 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.9124099016189575, + "learning_rate": 1.9482434581465955e-05, + "loss": 0.3447, + "step": 4492, + "teacher_loss": 0.28157228231430054 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5469323396682739, + "learning_rate": 1.9486771721844732e-05, + "loss": 0.2162, + "step": 4493, + "teacher_loss": 0.1794590950012207 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.6094321012496948, + "learning_rate": 1.9491108862223506e-05, + "loss": 0.2406, + "step": 4494, + "teacher_loss": 0.19964975118637085 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.39998501539230347, + "learning_rate": 1.9495446002602284e-05, + "loss": 0.2119, + "step": 4495, + "teacher_loss": 0.19099244475364685 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.30796992778778076, + "learning_rate": 1.949978314298106e-05, + "loss": 0.1991, + "step": 4496, + "teacher_loss": 0.18694816529750824 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.24383272230625153, + "learning_rate": 1.950412028335984e-05, + "loss": 0.1816, + "step": 4497, + "teacher_loss": 0.17470115423202515 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.9361656904220581, + "learning_rate": 1.9508457423738617e-05, + "loss": 0.3268, + "step": 4498, + "teacher_loss": 0.25906845927238464 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.350935161113739, + "learning_rate": 1.951279456411739e-05, + "loss": 0.1994, + "step": 4499, + "teacher_loss": 0.18253061175346375 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5836977958679199, + "learning_rate": 1.951713170449617e-05, + "loss": 0.2236, + "step": 4500, + "teacher_loss": 0.1836300939321518 + }, + { + "epoch": 0.81, + "eval_exact_match": 79.68779564806054, + "eval_f1": 87.29877574959808, + "step": 4500 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.3788067698478699, + "learning_rate": 1.9521468844874946e-05, + "loss": 0.2306, + "step": 4501, + "teacher_loss": 0.2141350507736206 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.6703027486801147, + "learning_rate": 1.9525805985253724e-05, + "loss": 0.5627, + "step": 4502, + "teacher_loss": 0.5506966710090637 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.5232012271881104, + "learning_rate": 1.95301431256325e-05, + "loss": 0.2659, + "step": 4503, + "teacher_loss": 0.2373151183128357 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.8341158628463745, + "learning_rate": 1.953448026601128e-05, + "loss": 0.439, + "step": 4504, + "teacher_loss": 0.3950817584991455 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.23976443707942963, + "learning_rate": 1.9538817406390053e-05, + "loss": 0.203, + "step": 4505, + "teacher_loss": 0.19886967539787292 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.8551978468894958, + "learning_rate": 1.954315454676883e-05, + "loss": 0.3792, + "step": 4506, + "teacher_loss": 0.32635772228240967 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.41043850779533386, + "learning_rate": 1.954749168714761e-05, + "loss": 0.211, + "step": 4507, + "teacher_loss": 0.18885841965675354 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 1.0946969985961914, + "learning_rate": 1.9551828827526383e-05, + "loss": 0.3556, + "step": 4508, + "teacher_loss": 0.27352410554885864 + }, + { + "compression_loss": 0.0, + "epoch": 0.81, + "label_loss": 0.9388766288757324, + "learning_rate": 1.955616596790516e-05, + "loss": 0.422, + "step": 4509, + "teacher_loss": 0.3645484447479248 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.27205973863601685, + "learning_rate": 1.9560503108283938e-05, + "loss": 0.1944, + "step": 4510, + "teacher_loss": 0.1857774555683136 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.28105664253234863, + "learning_rate": 1.9564840248662716e-05, + "loss": 0.1694, + "step": 4511, + "teacher_loss": 0.15697333216667175 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.4196627736091614, + "learning_rate": 1.9569177389041493e-05, + "loss": 0.2512, + "step": 4512, + "teacher_loss": 0.23247992992401123 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.49374252557754517, + "learning_rate": 1.957351452942027e-05, + "loss": 0.2479, + "step": 4513, + "teacher_loss": 0.2206338346004486 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3415621519088745, + "learning_rate": 1.957785166979905e-05, + "loss": 0.2428, + "step": 4514, + "teacher_loss": 0.23177313804626465 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3653858006000519, + "learning_rate": 1.9582188810177826e-05, + "loss": 0.2333, + "step": 4515, + "teacher_loss": 0.21857471764087677 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.7266503572463989, + "learning_rate": 1.9586525950556597e-05, + "loss": 0.2639, + "step": 4516, + "teacher_loss": 0.21243906021118164 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.30429768562316895, + "learning_rate": 1.9590863090935375e-05, + "loss": 0.1938, + "step": 4517, + "teacher_loss": 0.18154282867908478 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.33662354946136475, + "learning_rate": 1.9595200231314152e-05, + "loss": 0.2115, + "step": 4518, + "teacher_loss": 0.19755561649799347 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.41198205947875977, + "learning_rate": 1.959953737169293e-05, + "loss": 0.2634, + "step": 4519, + "teacher_loss": 0.24690744280815125 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.24396567046642303, + "learning_rate": 1.9603874512071708e-05, + "loss": 0.1828, + "step": 4520, + "teacher_loss": 0.17596638202667236 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3098486065864563, + "learning_rate": 1.9608211652450485e-05, + "loss": 0.2258, + "step": 4521, + "teacher_loss": 0.21641576290130615 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.4102702736854553, + "learning_rate": 1.9612548792829263e-05, + "loss": 0.2663, + "step": 4522, + "teacher_loss": 0.25027284026145935 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.6989509463310242, + "learning_rate": 1.961688593320804e-05, + "loss": 0.2886, + "step": 4523, + "teacher_loss": 0.2429679036140442 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.7013245820999146, + "learning_rate": 1.9621223073586818e-05, + "loss": 0.2614, + "step": 4524, + "teacher_loss": 0.2124658226966858 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.2899213433265686, + "learning_rate": 1.9625560213965592e-05, + "loss": 0.2247, + "step": 4525, + "teacher_loss": 0.21745355427265167 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.30669698119163513, + "learning_rate": 1.962989735434437e-05, + "loss": 0.1849, + "step": 4526, + "teacher_loss": 0.17131514847278595 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.2988119423389435, + "learning_rate": 1.9634234494723144e-05, + "loss": 0.2646, + "step": 4527, + "teacher_loss": 0.2607831358909607 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.34778010845184326, + "learning_rate": 1.9638571635101922e-05, + "loss": 0.2476, + "step": 4528, + "teacher_loss": 0.23642557859420776 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.8675686120986938, + "learning_rate": 1.96429087754807e-05, + "loss": 0.2739, + "step": 4529, + "teacher_loss": 0.20796382427215576 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.16270476579666138, + "learning_rate": 1.9647245915859477e-05, + "loss": 0.1877, + "step": 4530, + "teacher_loss": 0.19051004946231842 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3909509778022766, + "learning_rate": 1.9651583056238255e-05, + "loss": 0.3031, + "step": 4531, + "teacher_loss": 0.29334208369255066 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.6295802593231201, + "learning_rate": 1.9655920196617032e-05, + "loss": 0.3265, + "step": 4532, + "teacher_loss": 0.2928031086921692 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3191392123699188, + "learning_rate": 1.966025733699581e-05, + "loss": 0.1999, + "step": 4533, + "teacher_loss": 0.18661819398403168 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.23553380370140076, + "learning_rate": 1.9664594477374584e-05, + "loss": 0.2233, + "step": 4534, + "teacher_loss": 0.2219860553741455 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.5289946794509888, + "learning_rate": 1.9668931617753362e-05, + "loss": 0.2623, + "step": 4535, + "teacher_loss": 0.23262135684490204 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.7444685697555542, + "learning_rate": 1.967326875813214e-05, + "loss": 0.2742, + "step": 4536, + "teacher_loss": 0.22191445529460907 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.14221107959747314, + "learning_rate": 1.9677605898510914e-05, + "loss": 0.1563, + "step": 4537, + "teacher_loss": 0.1578819453716278 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.26338934898376465, + "learning_rate": 1.968194303888969e-05, + "loss": 0.2477, + "step": 4538, + "teacher_loss": 0.24596011638641357 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.4069359302520752, + "learning_rate": 1.968628017926847e-05, + "loss": 0.2683, + "step": 4539, + "teacher_loss": 0.2529076337814331 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.6068147420883179, + "learning_rate": 1.9690617319647246e-05, + "loss": 0.2619, + "step": 4540, + "teacher_loss": 0.2235943078994751 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.6759498119354248, + "learning_rate": 1.9694954460026024e-05, + "loss": 0.34, + "step": 4541, + "teacher_loss": 0.30271345376968384 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.5406700968742371, + "learning_rate": 1.9699291600404802e-05, + "loss": 0.3102, + "step": 4542, + "teacher_loss": 0.28463542461395264 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.9173877835273743, + "learning_rate": 1.9703628740783576e-05, + "loss": 0.3389, + "step": 4543, + "teacher_loss": 0.2745884656906128 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.5685233473777771, + "learning_rate": 1.9707965881162354e-05, + "loss": 0.3034, + "step": 4544, + "teacher_loss": 0.2739788889884949 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.2999640703201294, + "learning_rate": 1.971230302154113e-05, + "loss": 0.3097, + "step": 4545, + "teacher_loss": 0.3108366131782532 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3911818861961365, + "learning_rate": 1.971664016191991e-05, + "loss": 0.2303, + "step": 4546, + "teacher_loss": 0.2124561369419098 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.5935028195381165, + "learning_rate": 1.9720977302298686e-05, + "loss": 0.3666, + "step": 4547, + "teacher_loss": 0.3413691520690918 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.29289913177490234, + "learning_rate": 1.972531444267746e-05, + "loss": 0.2541, + "step": 4548, + "teacher_loss": 0.2497403621673584 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.4865691661834717, + "learning_rate": 1.972965158305624e-05, + "loss": 0.2662, + "step": 4549, + "teacher_loss": 0.24169203639030457 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3387758731842041, + "learning_rate": 1.9733988723435016e-05, + "loss": 0.2533, + "step": 4550, + "teacher_loss": 0.2437693178653717 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.24636882543563843, + "learning_rate": 1.9738325863813794e-05, + "loss": 0.1882, + "step": 4551, + "teacher_loss": 0.18174898624420166 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3845309019088745, + "learning_rate": 1.9742663004192568e-05, + "loss": 0.2156, + "step": 4552, + "teacher_loss": 0.1968461126089096 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.5481349229812622, + "learning_rate": 1.9747000144571345e-05, + "loss": 0.2311, + "step": 4553, + "teacher_loss": 0.1958249807357788 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.4277880787849426, + "learning_rate": 1.9751337284950123e-05, + "loss": 0.2467, + "step": 4554, + "teacher_loss": 0.2265796661376953 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.32164761424064636, + "learning_rate": 1.97556744253289e-05, + "loss": 0.2445, + "step": 4555, + "teacher_loss": 0.2359553426504135 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3364228904247284, + "learning_rate": 1.9760011565707678e-05, + "loss": 0.2191, + "step": 4556, + "teacher_loss": 0.20606756210327148 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.6412774324417114, + "learning_rate": 1.9764348706086456e-05, + "loss": 0.2744, + "step": 4557, + "teacher_loss": 0.2336086928844452 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.9273658990859985, + "learning_rate": 1.9768685846465234e-05, + "loss": 0.3519, + "step": 4558, + "teacher_loss": 0.288008451461792 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.2052639126777649, + "learning_rate": 1.9773022986844008e-05, + "loss": 0.2146, + "step": 4559, + "teacher_loss": 0.2156861126422882 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.9939955472946167, + "learning_rate": 1.9777360127222782e-05, + "loss": 0.3771, + "step": 4560, + "teacher_loss": 0.30852556228637695 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.12772300839424133, + "learning_rate": 1.978169726760156e-05, + "loss": 0.1706, + "step": 4561, + "teacher_loss": 0.17534644901752472 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3508272171020508, + "learning_rate": 1.9786034407980337e-05, + "loss": 0.2109, + "step": 4562, + "teacher_loss": 0.19531863927841187 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.3844001293182373, + "learning_rate": 1.9790371548359115e-05, + "loss": 0.288, + "step": 4563, + "teacher_loss": 0.27726036310195923 + }, + { + "compression_loss": 0.0, + "epoch": 0.82, + "label_loss": 0.26909855008125305, + "learning_rate": 1.9794708688737892e-05, + "loss": 0.1956, + "step": 4564, + "teacher_loss": 0.1874411702156067 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.6861876845359802, + "learning_rate": 1.979904582911667e-05, + "loss": 0.2015, + "step": 4565, + "teacher_loss": 0.14760680496692657 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.3776729106903076, + "learning_rate": 1.9803382969495448e-05, + "loss": 0.2861, + "step": 4566, + "teacher_loss": 0.2758902609348297 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.3799261748790741, + "learning_rate": 1.9807720109874225e-05, + "loss": 0.3281, + "step": 4567, + "teacher_loss": 0.32237547636032104 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.422038733959198, + "learning_rate": 1.9812057250253003e-05, + "loss": 0.1751, + "step": 4568, + "teacher_loss": 0.14764931797981262 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.5399374961853027, + "learning_rate": 1.9816394390631777e-05, + "loss": 0.2467, + "step": 4569, + "teacher_loss": 0.21413882076740265 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.3122031092643738, + "learning_rate": 1.982073153101055e-05, + "loss": 0.2259, + "step": 4570, + "teacher_loss": 0.216264545917511 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.5072436332702637, + "learning_rate": 1.982506867138933e-05, + "loss": 0.2367, + "step": 4571, + "teacher_loss": 0.20661015808582306 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.293565034866333, + "learning_rate": 1.9829405811768107e-05, + "loss": 0.2079, + "step": 4572, + "teacher_loss": 0.19843250513076782 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.565199077129364, + "learning_rate": 1.9833742952146884e-05, + "loss": 0.2299, + "step": 4573, + "teacher_loss": 0.19258946180343628 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2277761697769165, + "learning_rate": 1.9838080092525662e-05, + "loss": 0.1966, + "step": 4574, + "teacher_loss": 0.1931137591600418 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.3999192714691162, + "learning_rate": 1.984241723290444e-05, + "loss": 0.1918, + "step": 4575, + "teacher_loss": 0.16867533326148987 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.49854612350463867, + "learning_rate": 1.9846754373283217e-05, + "loss": 0.295, + "step": 4576, + "teacher_loss": 0.2723630666732788 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.4591091275215149, + "learning_rate": 1.9851091513661995e-05, + "loss": 0.3934, + "step": 4577, + "teacher_loss": 0.3860475718975067 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.6976302862167358, + "learning_rate": 1.985542865404077e-05, + "loss": 0.3109, + "step": 4578, + "teacher_loss": 0.2679854929447174 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.518950879573822, + "learning_rate": 1.9859765794419547e-05, + "loss": 0.4015, + "step": 4579, + "teacher_loss": 0.3884930908679962 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.3135051727294922, + "learning_rate": 1.9864102934798324e-05, + "loss": 0.2069, + "step": 4580, + "teacher_loss": 0.19507566094398499 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.34679627418518066, + "learning_rate": 1.98684400751771e-05, + "loss": 0.2058, + "step": 4581, + "teacher_loss": 0.1901879608631134 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.17885328829288483, + "learning_rate": 1.9872777215555876e-05, + "loss": 0.1553, + "step": 4582, + "teacher_loss": 0.15265792608261108 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.33397477865219116, + "learning_rate": 1.9877114355934654e-05, + "loss": 0.2942, + "step": 4583, + "teacher_loss": 0.2897806763648987 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.1667826622724533, + "learning_rate": 1.988145149631343e-05, + "loss": 0.2487, + "step": 4584, + "teacher_loss": 0.25777795910835266 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.44153517484664917, + "learning_rate": 1.988578863669221e-05, + "loss": 0.2454, + "step": 4585, + "teacher_loss": 0.22357740998268127 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.6416473984718323, + "learning_rate": 1.9890125777070987e-05, + "loss": 0.2902, + "step": 4586, + "teacher_loss": 0.25120314955711365 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.32736456394195557, + "learning_rate": 1.989446291744976e-05, + "loss": 0.2237, + "step": 4587, + "teacher_loss": 0.21213370561599731 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2532444894313812, + "learning_rate": 1.989880005782854e-05, + "loss": 0.2629, + "step": 4588, + "teacher_loss": 0.26399821043014526 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.41060197353363037, + "learning_rate": 1.9903137198207316e-05, + "loss": 0.2325, + "step": 4589, + "teacher_loss": 0.2126852124929428 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.42801234126091003, + "learning_rate": 1.9907474338586094e-05, + "loss": 0.2545, + "step": 4590, + "teacher_loss": 0.23517832159996033 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2867772877216339, + "learning_rate": 1.991181147896487e-05, + "loss": 0.2157, + "step": 4591, + "teacher_loss": 0.20779916644096375 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2646481692790985, + "learning_rate": 1.9916148619343646e-05, + "loss": 0.2082, + "step": 4592, + "teacher_loss": 0.20197591185569763 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.6676338911056519, + "learning_rate": 1.9920485759722423e-05, + "loss": 0.2677, + "step": 4593, + "teacher_loss": 0.2232159674167633 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2056477963924408, + "learning_rate": 1.99248229001012e-05, + "loss": 0.2409, + "step": 4594, + "teacher_loss": 0.24477984011173248 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.4429328143596649, + "learning_rate": 1.992916004047998e-05, + "loss": 0.2565, + "step": 4595, + "teacher_loss": 0.2358298897743225 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.5248163938522339, + "learning_rate": 1.9933497180858753e-05, + "loss": 0.2305, + "step": 4596, + "teacher_loss": 0.19779440760612488 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.5143501162528992, + "learning_rate": 1.993783432123753e-05, + "loss": 0.1966, + "step": 4597, + "teacher_loss": 0.16128087043762207 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.27569353580474854, + "learning_rate": 1.9942171461616308e-05, + "loss": 0.2339, + "step": 4598, + "teacher_loss": 0.22930589318275452 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.28145936131477356, + "learning_rate": 1.9946508601995086e-05, + "loss": 0.3269, + "step": 4599, + "teacher_loss": 0.33198282122612 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.25473153591156006, + "learning_rate": 1.9950845742373863e-05, + "loss": 0.1541, + "step": 4600, + "teacher_loss": 0.14293237030506134 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.6630728840827942, + "learning_rate": 1.995518288275264e-05, + "loss": 0.297, + "step": 4601, + "teacher_loss": 0.256326287984848 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.38394030928611755, + "learning_rate": 1.995952002313142e-05, + "loss": 0.182, + "step": 4602, + "teacher_loss": 0.15960556268692017 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.28238391876220703, + "learning_rate": 1.9963857163510193e-05, + "loss": 0.2436, + "step": 4603, + "teacher_loss": 0.23925140500068665 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.59486985206604, + "learning_rate": 1.9968194303888967e-05, + "loss": 0.2538, + "step": 4604, + "teacher_loss": 0.21590998768806458 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.18063107132911682, + "learning_rate": 1.9972531444267744e-05, + "loss": 0.238, + "step": 4605, + "teacher_loss": 0.24436254799365997 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.18290956318378448, + "learning_rate": 1.9976868584646522e-05, + "loss": 0.1972, + "step": 4606, + "teacher_loss": 0.19878965616226196 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.34946557879447937, + "learning_rate": 1.99812057250253e-05, + "loss": 0.2433, + "step": 4607, + "teacher_loss": 0.23149235546588898 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.38810551166534424, + "learning_rate": 1.9985542865404077e-05, + "loss": 0.2659, + "step": 4608, + "teacher_loss": 0.25229862332344055 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.13445571064949036, + "learning_rate": 1.9989880005782855e-05, + "loss": 0.1525, + "step": 4609, + "teacher_loss": 0.15454548597335815 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.5226759314537048, + "learning_rate": 1.9994217146161633e-05, + "loss": 0.2731, + "step": 4610, + "teacher_loss": 0.24531565606594086 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.5696532726287842, + "learning_rate": 1.999855428654041e-05, + "loss": 0.2858, + "step": 4611, + "teacher_loss": 0.25429433584213257 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2213011384010315, + "learning_rate": 2.0002891426919188e-05, + "loss": 0.176, + "step": 4612, + "teacher_loss": 0.17096082866191864 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.4153340458869934, + "learning_rate": 2.0007228567297962e-05, + "loss": 0.2592, + "step": 4613, + "teacher_loss": 0.24187862873077393 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.2847558557987213, + "learning_rate": 2.0011565707676736e-05, + "loss": 0.2076, + "step": 4614, + "teacher_loss": 0.19904077053070068 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.567826509475708, + "learning_rate": 2.0015902848055514e-05, + "loss": 0.253, + "step": 4615, + "teacher_loss": 0.21798181533813477 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.4530789852142334, + "learning_rate": 2.002023998843429e-05, + "loss": 0.2934, + "step": 4616, + "teacher_loss": 0.27566760778427124 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.748852550983429, + "learning_rate": 2.002457712881307e-05, + "loss": 0.4632, + "step": 4617, + "teacher_loss": 0.43142637610435486 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.43120384216308594, + "learning_rate": 2.0028914269191847e-05, + "loss": 0.2315, + "step": 4618, + "teacher_loss": 0.20936301350593567 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.6893396377563477, + "learning_rate": 2.0033251409570624e-05, + "loss": 0.2862, + "step": 4619, + "teacher_loss": 0.2414306104183197 + }, + { + "compression_loss": 0.0, + "epoch": 0.83, + "label_loss": 0.7687826156616211, + "learning_rate": 2.0037588549949402e-05, + "loss": 0.349, + "step": 4620, + "teacher_loss": 0.30236169695854187 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.30291542410850525, + "learning_rate": 2.004192569032818e-05, + "loss": 0.1647, + "step": 4621, + "teacher_loss": 0.14930902421474457 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.7168831825256348, + "learning_rate": 2.0046262830706954e-05, + "loss": 0.2702, + "step": 4622, + "teacher_loss": 0.2206028550863266 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.14678002893924713, + "learning_rate": 2.005059997108573e-05, + "loss": 0.1741, + "step": 4623, + "teacher_loss": 0.17716410756111145 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.7131956219673157, + "learning_rate": 2.005493711146451e-05, + "loss": 0.2812, + "step": 4624, + "teacher_loss": 0.2332216054201126 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4446783661842346, + "learning_rate": 2.0059274251843283e-05, + "loss": 0.3172, + "step": 4625, + "teacher_loss": 0.3030606806278229 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.5094561576843262, + "learning_rate": 2.006361139222206e-05, + "loss": 0.2817, + "step": 4626, + "teacher_loss": 0.256378173828125 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.19025397300720215, + "learning_rate": 2.006794853260084e-05, + "loss": 0.3337, + "step": 4627, + "teacher_loss": 0.3495849370956421 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.22981694340705872, + "learning_rate": 2.0072285672979616e-05, + "loss": 0.2523, + "step": 4628, + "teacher_loss": 0.25481337308883667 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.505085825920105, + "learning_rate": 2.0076622813358394e-05, + "loss": 0.2327, + "step": 4629, + "teacher_loss": 0.20246979594230652 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.28029829263687134, + "learning_rate": 2.008095995373717e-05, + "loss": 0.2025, + "step": 4630, + "teacher_loss": 0.1938786804676056 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4046053886413574, + "learning_rate": 2.0085297094115946e-05, + "loss": 0.247, + "step": 4631, + "teacher_loss": 0.22950223088264465 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.6568427681922913, + "learning_rate": 2.0089634234494723e-05, + "loss": 0.2357, + "step": 4632, + "teacher_loss": 0.1888718605041504 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4127659797668457, + "learning_rate": 2.00939713748735e-05, + "loss": 0.4528, + "step": 4633, + "teacher_loss": 0.45723968744277954 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.6477503776550293, + "learning_rate": 2.009830851525228e-05, + "loss": 0.2743, + "step": 4634, + "teacher_loss": 0.23280251026153564 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.0879109799861908, + "learning_rate": 2.0102645655631053e-05, + "loss": 0.1873, + "step": 4635, + "teacher_loss": 0.1983334869146347 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.7636901140213013, + "learning_rate": 2.010698279600983e-05, + "loss": 0.2582, + "step": 4636, + "teacher_loss": 0.20200307667255402 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.30433574318885803, + "learning_rate": 2.0111319936388608e-05, + "loss": 0.1881, + "step": 4637, + "teacher_loss": 0.17519734799861908 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.40762999653816223, + "learning_rate": 2.0115657076767386e-05, + "loss": 0.2769, + "step": 4638, + "teacher_loss": 0.26240506768226624 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.33742058277130127, + "learning_rate": 2.0119994217146163e-05, + "loss": 0.2132, + "step": 4639, + "teacher_loss": 0.1993464231491089 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4958832859992981, + "learning_rate": 2.0124331357524938e-05, + "loss": 0.4503, + "step": 4640, + "teacher_loss": 0.4452378749847412 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.5890842080116272, + "learning_rate": 2.0128668497903715e-05, + "loss": 0.2796, + "step": 4641, + "teacher_loss": 0.24525412917137146 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4865787625312805, + "learning_rate": 2.0133005638282493e-05, + "loss": 0.3788, + "step": 4642, + "teacher_loss": 0.3668323755264282 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.5590130090713501, + "learning_rate": 2.013734277866127e-05, + "loss": 0.2197, + "step": 4643, + "teacher_loss": 0.18195965886116028 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.39190101623535156, + "learning_rate": 2.0141679919040048e-05, + "loss": 0.2175, + "step": 4644, + "teacher_loss": 0.19811929762363434 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.6120504140853882, + "learning_rate": 2.0146017059418826e-05, + "loss": 0.2305, + "step": 4645, + "teacher_loss": 0.18812735378742218 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.39701128005981445, + "learning_rate": 2.01503541997976e-05, + "loss": 0.209, + "step": 4646, + "teacher_loss": 0.18812063336372375 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.416189044713974, + "learning_rate": 2.0154691340176378e-05, + "loss": 0.229, + "step": 4647, + "teacher_loss": 0.2081766426563263 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4827533960342407, + "learning_rate": 2.0159028480555155e-05, + "loss": 0.3182, + "step": 4648, + "teacher_loss": 0.29995983839035034 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.2939597964286804, + "learning_rate": 2.016336562093393e-05, + "loss": 0.2501, + "step": 4649, + "teacher_loss": 0.2452666312456131 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.19988112151622772, + "learning_rate": 2.0167702761312707e-05, + "loss": 0.178, + "step": 4650, + "teacher_loss": 0.17560827732086182 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.5346502065658569, + "learning_rate": 2.0172039901691485e-05, + "loss": 0.3132, + "step": 4651, + "teacher_loss": 0.28857722878456116 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.8323071002960205, + "learning_rate": 2.0176377042070262e-05, + "loss": 0.3608, + "step": 4652, + "teacher_loss": 0.30846092104911804 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.47282472252845764, + "learning_rate": 2.018071418244904e-05, + "loss": 0.251, + "step": 4653, + "teacher_loss": 0.226351797580719 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.18234100937843323, + "learning_rate": 2.0185051322827817e-05, + "loss": 0.1523, + "step": 4654, + "teacher_loss": 0.14896777272224426 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.7059367895126343, + "learning_rate": 2.0189388463206595e-05, + "loss": 0.3869, + "step": 4655, + "teacher_loss": 0.351399302482605 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.3463664650917053, + "learning_rate": 2.0193725603585373e-05, + "loss": 0.3583, + "step": 4656, + "teacher_loss": 0.3596689701080322 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.2776446044445038, + "learning_rate": 2.0198062743964144e-05, + "loss": 0.2119, + "step": 4657, + "teacher_loss": 0.20459237694740295 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.3087308406829834, + "learning_rate": 2.020239988434292e-05, + "loss": 0.2, + "step": 4658, + "teacher_loss": 0.18793678283691406 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.6109752655029297, + "learning_rate": 2.02067370247217e-05, + "loss": 0.2374, + "step": 4659, + "teacher_loss": 0.19585120677947998 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.5284692049026489, + "learning_rate": 2.0211074165100476e-05, + "loss": 0.2099, + "step": 4660, + "teacher_loss": 0.1745302379131317 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.7406220436096191, + "learning_rate": 2.0215411305479254e-05, + "loss": 0.2921, + "step": 4661, + "teacher_loss": 0.24231459200382233 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.7132091522216797, + "learning_rate": 2.021974844585803e-05, + "loss": 0.2869, + "step": 4662, + "teacher_loss": 0.23954693973064423 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.40481775999069214, + "learning_rate": 2.022408558623681e-05, + "loss": 0.2464, + "step": 4663, + "teacher_loss": 0.22883744537830353 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.9858428835868835, + "learning_rate": 2.0228422726615587e-05, + "loss": 0.3668, + "step": 4664, + "teacher_loss": 0.29804179072380066 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.21836432814598083, + "learning_rate": 2.0232759866994365e-05, + "loss": 0.2111, + "step": 4665, + "teacher_loss": 0.2102908492088318 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.37891945242881775, + "learning_rate": 2.023709700737314e-05, + "loss": 0.2736, + "step": 4666, + "teacher_loss": 0.2619180679321289 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.4839324951171875, + "learning_rate": 2.0241434147751916e-05, + "loss": 0.231, + "step": 4667, + "teacher_loss": 0.20291519165039062 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.49740272760391235, + "learning_rate": 2.024577128813069e-05, + "loss": 0.216, + "step": 4668, + "teacher_loss": 0.184719979763031 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.429978609085083, + "learning_rate": 2.0250108428509468e-05, + "loss": 0.2008, + "step": 4669, + "teacher_loss": 0.1753380298614502 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.833977997303009, + "learning_rate": 2.0254445568888246e-05, + "loss": 0.2871, + "step": 4670, + "teacher_loss": 0.22636312246322632 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.6584421396255493, + "learning_rate": 2.0258782709267024e-05, + "loss": 0.2628, + "step": 4671, + "teacher_loss": 0.2188015580177307 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.18187645077705383, + "learning_rate": 2.02631198496458e-05, + "loss": 0.2039, + "step": 4672, + "teacher_loss": 0.20632772147655487 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.22288449108600616, + "learning_rate": 2.026745699002458e-05, + "loss": 0.199, + "step": 4673, + "teacher_loss": 0.19632232189178467 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.6050679087638855, + "learning_rate": 2.0271794130403356e-05, + "loss": 0.284, + "step": 4674, + "teacher_loss": 0.24833041429519653 + }, + { + "compression_loss": 0.0, + "epoch": 0.84, + "label_loss": 0.35629603266716003, + "learning_rate": 2.027613127078213e-05, + "loss": 0.2249, + "step": 4675, + "teacher_loss": 0.21033364534378052 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.36245405673980713, + "learning_rate": 2.0280468411160908e-05, + "loss": 0.2791, + "step": 4676, + "teacher_loss": 0.26981133222579956 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.1875852793455124, + "learning_rate": 2.0284805551539686e-05, + "loss": 0.194, + "step": 4677, + "teacher_loss": 0.19476372003555298 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.32758450508117676, + "learning_rate": 2.0289142691918463e-05, + "loss": 0.2004, + "step": 4678, + "teacher_loss": 0.18627595901489258 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.7231752872467041, + "learning_rate": 2.0293479832297238e-05, + "loss": 0.2664, + "step": 4679, + "teacher_loss": 0.21566519141197205 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3958778381347656, + "learning_rate": 2.0297816972676015e-05, + "loss": 0.2049, + "step": 4680, + "teacher_loss": 0.18370817601680756 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.8475362658500671, + "learning_rate": 2.0302154113054793e-05, + "loss": 0.3372, + "step": 4681, + "teacher_loss": 0.2804933786392212 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.40638116002082825, + "learning_rate": 2.030649125343357e-05, + "loss": 0.3294, + "step": 4682, + "teacher_loss": 0.320858895778656 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.5833108425140381, + "learning_rate": 2.0310828393812348e-05, + "loss": 0.2914, + "step": 4683, + "teacher_loss": 0.2590058147907257 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3511325418949127, + "learning_rate": 2.0315165534191122e-05, + "loss": 0.227, + "step": 4684, + "teacher_loss": 0.21316352486610413 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.7975261807441711, + "learning_rate": 2.03195026745699e-05, + "loss": 0.316, + "step": 4685, + "teacher_loss": 0.26249682903289795 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.5050575137138367, + "learning_rate": 2.0323839814948678e-05, + "loss": 0.2924, + "step": 4686, + "teacher_loss": 0.2688046097755432 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.820890486240387, + "learning_rate": 2.0328176955327455e-05, + "loss": 0.3725, + "step": 4687, + "teacher_loss": 0.32263654470443726 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3852985203266144, + "learning_rate": 2.0332514095706233e-05, + "loss": 0.257, + "step": 4688, + "teacher_loss": 0.24279460310935974 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.34823161363601685, + "learning_rate": 2.033685123608501e-05, + "loss": 0.1979, + "step": 4689, + "teacher_loss": 0.1811620444059372 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.4771634638309479, + "learning_rate": 2.0341188376463785e-05, + "loss": 0.307, + "step": 4690, + "teacher_loss": 0.2881277799606323 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.4022625684738159, + "learning_rate": 2.0345525516842562e-05, + "loss": 0.3098, + "step": 4691, + "teacher_loss": 0.2995000183582306 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.36342519521713257, + "learning_rate": 2.034986265722134e-05, + "loss": 0.2023, + "step": 4692, + "teacher_loss": 0.18435952067375183 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.8731607794761658, + "learning_rate": 2.0354199797600114e-05, + "loss": 0.2817, + "step": 4693, + "teacher_loss": 0.2159455120563507 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.350460410118103, + "learning_rate": 2.0358536937978892e-05, + "loss": 0.2447, + "step": 4694, + "teacher_loss": 0.23291495442390442 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.10431374609470367, + "learning_rate": 2.036287407835767e-05, + "loss": 0.1775, + "step": 4695, + "teacher_loss": 0.18567220866680145 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.32678449153900146, + "learning_rate": 2.0367211218736447e-05, + "loss": 0.2206, + "step": 4696, + "teacher_loss": 0.20879864692687988 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.4416809380054474, + "learning_rate": 2.0371548359115225e-05, + "loss": 0.3525, + "step": 4697, + "teacher_loss": 0.34253618121147156 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.410899817943573, + "learning_rate": 2.0375885499494002e-05, + "loss": 0.3245, + "step": 4698, + "teacher_loss": 0.3148714303970337 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.40641552209854126, + "learning_rate": 2.038022263987278e-05, + "loss": 0.2325, + "step": 4699, + "teacher_loss": 0.21312814950942993 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.24704644083976746, + "learning_rate": 2.0384559780251558e-05, + "loss": 0.2203, + "step": 4700, + "teacher_loss": 0.21727995574474335 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.20583905279636383, + "learning_rate": 2.038889692063033e-05, + "loss": 0.2012, + "step": 4701, + "teacher_loss": 0.2007027566432953 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.17191235721111298, + "learning_rate": 2.0393234061009106e-05, + "loss": 0.2109, + "step": 4702, + "teacher_loss": 0.2152131199836731 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.2472323328256607, + "learning_rate": 2.0397571201387884e-05, + "loss": 0.2013, + "step": 4703, + "teacher_loss": 0.1961841881275177 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.7080936431884766, + "learning_rate": 2.040190834176666e-05, + "loss": 0.3386, + "step": 4704, + "teacher_loss": 0.2975391447544098 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.32597437500953674, + "learning_rate": 2.040624548214544e-05, + "loss": 0.221, + "step": 4705, + "teacher_loss": 0.20930588245391846 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.43104469776153564, + "learning_rate": 2.0410582622524217e-05, + "loss": 0.2809, + "step": 4706, + "teacher_loss": 0.2642444372177124 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.2714369297027588, + "learning_rate": 2.0414919762902994e-05, + "loss": 0.1665, + "step": 4707, + "teacher_loss": 0.15481694042682648 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.4421897232532501, + "learning_rate": 2.0419256903281772e-05, + "loss": 0.4849, + "step": 4708, + "teacher_loss": 0.4896079897880554 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.16248080134391785, + "learning_rate": 2.042359404366055e-05, + "loss": 0.1535, + "step": 4709, + "teacher_loss": 0.15255028009414673 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.45376864075660706, + "learning_rate": 2.0427931184039324e-05, + "loss": 0.2726, + "step": 4710, + "teacher_loss": 0.2524913549423218 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 1.007807731628418, + "learning_rate": 2.04322683244181e-05, + "loss": 0.2875, + "step": 4711, + "teacher_loss": 0.20751619338989258 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3113197088241577, + "learning_rate": 2.0436605464796876e-05, + "loss": 0.2072, + "step": 4712, + "teacher_loss": 0.19564369320869446 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.5506263971328735, + "learning_rate": 2.0440942605175653e-05, + "loss": 0.269, + "step": 4713, + "teacher_loss": 0.2377415895462036 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.4580322504043579, + "learning_rate": 2.044527974555443e-05, + "loss": 0.2799, + "step": 4714, + "teacher_loss": 0.2601166367530823 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3905421495437622, + "learning_rate": 2.044961688593321e-05, + "loss": 0.198, + "step": 4715, + "teacher_loss": 0.17661559581756592 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.10462985932826996, + "learning_rate": 2.0453954026311986e-05, + "loss": 0.1959, + "step": 4716, + "teacher_loss": 0.20600327849388123 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.7212705016136169, + "learning_rate": 2.0458291166690764e-05, + "loss": 0.4682, + "step": 4717, + "teacher_loss": 0.4401141107082367 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.6866236925125122, + "learning_rate": 2.046262830706954e-05, + "loss": 0.2658, + "step": 4718, + "teacher_loss": 0.21898984909057617 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3656891882419586, + "learning_rate": 2.0466965447448315e-05, + "loss": 0.1876, + "step": 4719, + "teacher_loss": 0.16785411536693573 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.32728344202041626, + "learning_rate": 2.0471302587827093e-05, + "loss": 0.2362, + "step": 4720, + "teacher_loss": 0.22608011960983276 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3760058283805847, + "learning_rate": 2.047563972820587e-05, + "loss": 0.2329, + "step": 4721, + "teacher_loss": 0.2169874608516693 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.5055915117263794, + "learning_rate": 2.047997686858465e-05, + "loss": 0.2606, + "step": 4722, + "teacher_loss": 0.23332643508911133 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.5275909900665283, + "learning_rate": 2.0484314008963423e-05, + "loss": 0.2928, + "step": 4723, + "teacher_loss": 0.2667027711868286 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.1965922713279724, + "learning_rate": 2.04886511493422e-05, + "loss": 0.1563, + "step": 4724, + "teacher_loss": 0.15185774862766266 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.48514658212661743, + "learning_rate": 2.0492988289720978e-05, + "loss": 0.272, + "step": 4725, + "teacher_loss": 0.24828682839870453 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.2736703157424927, + "learning_rate": 2.0497325430099755e-05, + "loss": 0.2071, + "step": 4726, + "teacher_loss": 0.1997268944978714 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.266363263130188, + "learning_rate": 2.0501662570478533e-05, + "loss": 0.212, + "step": 4727, + "teacher_loss": 0.20596735179424286 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.21976739168167114, + "learning_rate": 2.0505999710857307e-05, + "loss": 0.23, + "step": 4728, + "teacher_loss": 0.2310842126607895 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.304928719997406, + "learning_rate": 2.0510336851236085e-05, + "loss": 0.2285, + "step": 4729, + "teacher_loss": 0.2199726700782776 + }, + { + "compression_loss": 0.0, + "epoch": 0.85, + "label_loss": 0.3335579037666321, + "learning_rate": 2.0514673991614863e-05, + "loss": 0.3004, + "step": 4730, + "teacher_loss": 0.29673337936401367 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.10645858943462372, + "learning_rate": 2.051901113199364e-05, + "loss": 0.133, + "step": 4731, + "teacher_loss": 0.13590675592422485 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5253108739852905, + "learning_rate": 2.0523348272372418e-05, + "loss": 0.2455, + "step": 4732, + "teacher_loss": 0.21445125341415405 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.1407293975353241, + "learning_rate": 2.0527685412751192e-05, + "loss": 0.123, + "step": 4733, + "teacher_loss": 0.12104855477809906 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.49479037523269653, + "learning_rate": 2.053202255312997e-05, + "loss": 0.2792, + "step": 4734, + "teacher_loss": 0.2552777826786041 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3112625479698181, + "learning_rate": 2.0536359693508747e-05, + "loss": 0.2552, + "step": 4735, + "teacher_loss": 0.24896648526191711 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.21511104702949524, + "learning_rate": 2.0540696833887525e-05, + "loss": 0.2539, + "step": 4736, + "teacher_loss": 0.25825080275535583 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.18832336366176605, + "learning_rate": 2.05450339742663e-05, + "loss": 0.2253, + "step": 4737, + "teacher_loss": 0.22936120629310608 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.24616801738739014, + "learning_rate": 2.0549371114645077e-05, + "loss": 0.2645, + "step": 4738, + "teacher_loss": 0.2665729522705078 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.446675568819046, + "learning_rate": 2.0553708255023854e-05, + "loss": 0.2128, + "step": 4739, + "teacher_loss": 0.18684086203575134 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.33981913328170776, + "learning_rate": 2.0558045395402632e-05, + "loss": 0.3022, + "step": 4740, + "teacher_loss": 0.29804426431655884 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.609460711479187, + "learning_rate": 2.056238253578141e-05, + "loss": 0.2438, + "step": 4741, + "teacher_loss": 0.20321688055992126 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5373461246490479, + "learning_rate": 2.0566719676160187e-05, + "loss": 0.2257, + "step": 4742, + "teacher_loss": 0.1911269724369049 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.34360814094543457, + "learning_rate": 2.0571056816538965e-05, + "loss": 0.195, + "step": 4743, + "teacher_loss": 0.1784539669752121 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.21057409048080444, + "learning_rate": 2.057539395691774e-05, + "loss": 0.2385, + "step": 4744, + "teacher_loss": 0.24161408841609955 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.923721432685852, + "learning_rate": 2.0579731097296513e-05, + "loss": 0.2734, + "step": 4745, + "teacher_loss": 0.20118948817253113 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.541483998298645, + "learning_rate": 2.058406823767529e-05, + "loss": 0.321, + "step": 4746, + "teacher_loss": 0.296536386013031 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5715552568435669, + "learning_rate": 2.058840537805407e-05, + "loss": 0.2268, + "step": 4747, + "teacher_loss": 0.18853317201137543 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.40522849559783936, + "learning_rate": 2.0592742518432846e-05, + "loss": 0.2422, + "step": 4748, + "teacher_loss": 0.22406047582626343 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.4761125147342682, + "learning_rate": 2.0597079658811624e-05, + "loss": 0.3276, + "step": 4749, + "teacher_loss": 0.31105393171310425 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3519165813922882, + "learning_rate": 2.06014167991904e-05, + "loss": 0.3553, + "step": 4750, + "teacher_loss": 0.3556811809539795 + }, + { + "epoch": 0.86, + "eval_exact_match": 79.62157048249763, + "eval_f1": 87.10371369303293, + "step": 4750 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.4493677616119385, + "learning_rate": 2.060575393956918e-05, + "loss": 0.2374, + "step": 4751, + "teacher_loss": 0.21380159258842468 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3715413510799408, + "learning_rate": 2.0610091079947957e-05, + "loss": 0.2188, + "step": 4752, + "teacher_loss": 0.20183061063289642 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.22725386917591095, + "learning_rate": 2.0614428220326734e-05, + "loss": 0.2443, + "step": 4753, + "teacher_loss": 0.24614199995994568 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.6654298305511475, + "learning_rate": 2.061876536070551e-05, + "loss": 0.2793, + "step": 4754, + "teacher_loss": 0.2363489270210266 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.27652814984321594, + "learning_rate": 2.0623102501084283e-05, + "loss": 0.1607, + "step": 4755, + "teacher_loss": 0.14788006246089935 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5477147102355957, + "learning_rate": 2.062743964146306e-05, + "loss": 0.2934, + "step": 4756, + "teacher_loss": 0.2651033103466034 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.7871124148368835, + "learning_rate": 2.0631776781841838e-05, + "loss": 0.2691, + "step": 4757, + "teacher_loss": 0.21153008937835693 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.26898670196533203, + "learning_rate": 2.0636113922220616e-05, + "loss": 0.1877, + "step": 4758, + "teacher_loss": 0.17865067720413208 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.8086433410644531, + "learning_rate": 2.0640451062599393e-05, + "loss": 0.371, + "step": 4759, + "teacher_loss": 0.32240110635757446 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5675837397575378, + "learning_rate": 2.064478820297817e-05, + "loss": 0.228, + "step": 4760, + "teacher_loss": 0.19025897979736328 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3095381259918213, + "learning_rate": 2.064912534335695e-05, + "loss": 0.2732, + "step": 4761, + "teacher_loss": 0.26919642090797424 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.4400290846824646, + "learning_rate": 2.0653462483735726e-05, + "loss": 0.3156, + "step": 4762, + "teacher_loss": 0.3018299639225006 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3071986436843872, + "learning_rate": 2.06577996241145e-05, + "loss": 0.2976, + "step": 4763, + "teacher_loss": 0.29654982686042786 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.6105849146842957, + "learning_rate": 2.0662136764493278e-05, + "loss": 0.2293, + "step": 4764, + "teacher_loss": 0.18698062002658844 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.24157050251960754, + "learning_rate": 2.0666473904872056e-05, + "loss": 0.1932, + "step": 4765, + "teacher_loss": 0.1878001093864441 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3392367362976074, + "learning_rate": 2.067081104525083e-05, + "loss": 0.1733, + "step": 4766, + "teacher_loss": 0.1549014449119568 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.40825098752975464, + "learning_rate": 2.0675148185629607e-05, + "loss": 0.2354, + "step": 4767, + "teacher_loss": 0.21617184579372406 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.08453512191772461, + "learning_rate": 2.0679485326008385e-05, + "loss": 0.1945, + "step": 4768, + "teacher_loss": 0.20673884451389313 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5940424203872681, + "learning_rate": 2.0683822466387163e-05, + "loss": 0.2733, + "step": 4769, + "teacher_loss": 0.23763887584209442 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.4813592731952667, + "learning_rate": 2.068815960676594e-05, + "loss": 0.3155, + "step": 4770, + "teacher_loss": 0.2970581650733948 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.2245270013809204, + "learning_rate": 2.0692496747144718e-05, + "loss": 0.2075, + "step": 4771, + "teacher_loss": 0.20565257966518402 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.7164108157157898, + "learning_rate": 2.0696833887523492e-05, + "loss": 0.283, + "step": 4772, + "teacher_loss": 0.23480072617530823 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.2889282703399658, + "learning_rate": 2.070117102790227e-05, + "loss": 0.237, + "step": 4773, + "teacher_loss": 0.2312161773443222 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.38308480381965637, + "learning_rate": 2.0705508168281047e-05, + "loss": 0.2192, + "step": 4774, + "teacher_loss": 0.20097298920154572 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5842920541763306, + "learning_rate": 2.0709845308659825e-05, + "loss": 0.2573, + "step": 4775, + "teacher_loss": 0.2209763079881668 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.4704876244068146, + "learning_rate": 2.0714182449038603e-05, + "loss": 0.2203, + "step": 4776, + "teacher_loss": 0.19244609773159027 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.2384336292743683, + "learning_rate": 2.0718519589417377e-05, + "loss": 0.1677, + "step": 4777, + "teacher_loss": 0.1598646491765976 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5685132741928101, + "learning_rate": 2.0722856729796155e-05, + "loss": 0.25, + "step": 4778, + "teacher_loss": 0.2146632820367813 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.17542965710163116, + "learning_rate": 2.0727193870174932e-05, + "loss": 0.1818, + "step": 4779, + "teacher_loss": 0.18249854445457458 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.36382240056991577, + "learning_rate": 2.073153101055371e-05, + "loss": 0.2315, + "step": 4780, + "teacher_loss": 0.2167939394712448 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.36647355556488037, + "learning_rate": 2.0735868150932484e-05, + "loss": 0.2922, + "step": 4781, + "teacher_loss": 0.28393611311912537 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.4240023195743561, + "learning_rate": 2.074020529131126e-05, + "loss": 0.1608, + "step": 4782, + "teacher_loss": 0.13160736858844757 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3196992874145508, + "learning_rate": 2.074454243169004e-05, + "loss": 0.2416, + "step": 4783, + "teacher_loss": 0.23295104503631592 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.5352063179016113, + "learning_rate": 2.0748879572068817e-05, + "loss": 0.2278, + "step": 4784, + "teacher_loss": 0.1936257779598236 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.6432743072509766, + "learning_rate": 2.0753216712447594e-05, + "loss": 0.3525, + "step": 4785, + "teacher_loss": 0.3202311396598816 + }, + { + "compression_loss": 0.0, + "epoch": 0.86, + "label_loss": 0.3428359031677246, + "learning_rate": 2.0757553852826372e-05, + "loss": 0.2875, + "step": 4786, + "teacher_loss": 0.28130775690078735 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.06807465851306915, + "learning_rate": 2.076189099320515e-05, + "loss": 0.1071, + "step": 4787, + "teacher_loss": 0.11140544712543488 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4267340898513794, + "learning_rate": 2.0766228133583924e-05, + "loss": 0.334, + "step": 4788, + "teacher_loss": 0.32367902994155884 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6794570088386536, + "learning_rate": 2.07705652739627e-05, + "loss": 0.335, + "step": 4789, + "teacher_loss": 0.29668188095092773 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6524490118026733, + "learning_rate": 2.0774902414341476e-05, + "loss": 0.3174, + "step": 4790, + "teacher_loss": 0.2801551818847656 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.5418270230293274, + "learning_rate": 2.0779239554720253e-05, + "loss": 0.2498, + "step": 4791, + "teacher_loss": 0.2173733413219452 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3127993047237396, + "learning_rate": 2.078357669509903e-05, + "loss": 0.2236, + "step": 4792, + "teacher_loss": 0.21372443437576294 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.36337536573410034, + "learning_rate": 2.078791383547781e-05, + "loss": 0.2492, + "step": 4793, + "teacher_loss": 0.23648947477340698 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3192163109779358, + "learning_rate": 2.0792250975856586e-05, + "loss": 0.2098, + "step": 4794, + "teacher_loss": 0.1975913941860199 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.44483301043510437, + "learning_rate": 2.0796588116235364e-05, + "loss": 0.2089, + "step": 4795, + "teacher_loss": 0.18273496627807617 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.35255932807922363, + "learning_rate": 2.080092525661414e-05, + "loss": 0.2944, + "step": 4796, + "teacher_loss": 0.28792160749435425 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4680088460445404, + "learning_rate": 2.080526239699292e-05, + "loss": 0.2357, + "step": 4797, + "teacher_loss": 0.20989438891410828 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3696249723434448, + "learning_rate": 2.0809599537371693e-05, + "loss": 0.2488, + "step": 4798, + "teacher_loss": 0.23542523384094238 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6639704704284668, + "learning_rate": 2.0813936677750468e-05, + "loss": 0.3229, + "step": 4799, + "teacher_loss": 0.2850167751312256 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6983274817466736, + "learning_rate": 2.0818273818129245e-05, + "loss": 0.299, + "step": 4800, + "teacher_loss": 0.25460439920425415 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4615797996520996, + "learning_rate": 2.0822610958508023e-05, + "loss": 0.2763, + "step": 4801, + "teacher_loss": 0.2557072639465332 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.31135794520378113, + "learning_rate": 2.08269480988868e-05, + "loss": 0.2069, + "step": 4802, + "teacher_loss": 0.19526076316833496 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.7562644481658936, + "learning_rate": 2.0831285239265578e-05, + "loss": 0.2376, + "step": 4803, + "teacher_loss": 0.17996902763843536 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.12007440626621246, + "learning_rate": 2.0835622379644356e-05, + "loss": 0.218, + "step": 4804, + "teacher_loss": 0.2288973182439804 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.5890671014785767, + "learning_rate": 2.0839959520023133e-05, + "loss": 0.2444, + "step": 4805, + "teacher_loss": 0.20609742403030396 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.30036479234695435, + "learning_rate": 2.084429666040191e-05, + "loss": 0.1994, + "step": 4806, + "teacher_loss": 0.18820902705192566 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.47405874729156494, + "learning_rate": 2.0848633800780685e-05, + "loss": 0.2548, + "step": 4807, + "teacher_loss": 0.23042921721935272 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4083493947982788, + "learning_rate": 2.0852970941159463e-05, + "loss": 0.2213, + "step": 4808, + "teacher_loss": 0.20048563182353973 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.34885621070861816, + "learning_rate": 2.085730808153824e-05, + "loss": 0.2864, + "step": 4809, + "teacher_loss": 0.27946630120277405 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.2252580225467682, + "learning_rate": 2.0861645221917015e-05, + "loss": 0.1794, + "step": 4810, + "teacher_loss": 0.17431147396564484 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4814949631690979, + "learning_rate": 2.0865982362295792e-05, + "loss": 0.2516, + "step": 4811, + "teacher_loss": 0.22600892186164856 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.518089771270752, + "learning_rate": 2.087031950267457e-05, + "loss": 0.4367, + "step": 4812, + "teacher_loss": 0.4276808500289917 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.20721884071826935, + "learning_rate": 2.0874656643053348e-05, + "loss": 0.1723, + "step": 4813, + "teacher_loss": 0.16841307282447815 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4128264784812927, + "learning_rate": 2.0878993783432125e-05, + "loss": 0.2428, + "step": 4814, + "teacher_loss": 0.22395160794258118 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3508679270744324, + "learning_rate": 2.0883330923810903e-05, + "loss": 0.156, + "step": 4815, + "teacher_loss": 0.13437426090240479 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3383867144584656, + "learning_rate": 2.0887668064189677e-05, + "loss": 0.2763, + "step": 4816, + "teacher_loss": 0.2693929672241211 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.8833049535751343, + "learning_rate": 2.0892005204568455e-05, + "loss": 0.2856, + "step": 4817, + "teacher_loss": 0.2191411554813385 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.499011754989624, + "learning_rate": 2.0896342344947232e-05, + "loss": 0.2837, + "step": 4818, + "teacher_loss": 0.25977832078933716 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.1661023199558258, + "learning_rate": 2.090067948532601e-05, + "loss": 0.1895, + "step": 4819, + "teacher_loss": 0.19215211272239685 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.35957083106040955, + "learning_rate": 2.0905016625704788e-05, + "loss": 0.2111, + "step": 4820, + "teacher_loss": 0.1946393847465515 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.9085617661476135, + "learning_rate": 2.0909353766083562e-05, + "loss": 0.3808, + "step": 4821, + "teacher_loss": 0.32211828231811523 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.8703345060348511, + "learning_rate": 2.091369090646234e-05, + "loss": 0.2934, + "step": 4822, + "teacher_loss": 0.22929087281227112 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.44223934412002563, + "learning_rate": 2.0918028046841117e-05, + "loss": 0.2689, + "step": 4823, + "teacher_loss": 0.24968111515045166 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.5612695217132568, + "learning_rate": 2.0922365187219895e-05, + "loss": 0.3717, + "step": 4824, + "teacher_loss": 0.3506481647491455 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.46431466937065125, + "learning_rate": 2.092670232759867e-05, + "loss": 0.3103, + "step": 4825, + "teacher_loss": 0.293241024017334 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6635532975196838, + "learning_rate": 2.0931039467977447e-05, + "loss": 0.2327, + "step": 4826, + "teacher_loss": 0.1848260760307312 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6287559866905212, + "learning_rate": 2.0935376608356224e-05, + "loss": 0.3335, + "step": 4827, + "teacher_loss": 0.3006608486175537 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.4950222373008728, + "learning_rate": 2.0939713748735002e-05, + "loss": 0.2394, + "step": 4828, + "teacher_loss": 0.21098008751869202 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.5812622308731079, + "learning_rate": 2.094405088911378e-05, + "loss": 0.2256, + "step": 4829, + "teacher_loss": 0.18605396151542664 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6748309135437012, + "learning_rate": 2.0948388029492557e-05, + "loss": 0.621, + "step": 4830, + "teacher_loss": 0.6150619387626648 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 1.085985541343689, + "learning_rate": 2.095272516987133e-05, + "loss": 0.3593, + "step": 4831, + "teacher_loss": 0.27858567237854004 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.8553024530410767, + "learning_rate": 2.095706231025011e-05, + "loss": 0.5284, + "step": 4832, + "teacher_loss": 0.4920288920402527 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3057997226715088, + "learning_rate": 2.0961399450628886e-05, + "loss": 0.2111, + "step": 4833, + "teacher_loss": 0.20057693123817444 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3562811017036438, + "learning_rate": 2.096573659100766e-05, + "loss": 0.2201, + "step": 4834, + "teacher_loss": 0.20495307445526123 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.7519885301589966, + "learning_rate": 2.097007373138644e-05, + "loss": 0.3094, + "step": 4835, + "teacher_loss": 0.2602734863758087 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.6169558763504028, + "learning_rate": 2.0974410871765216e-05, + "loss": 0.2239, + "step": 4836, + "teacher_loss": 0.18018747866153717 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3238542377948761, + "learning_rate": 2.0978748012143994e-05, + "loss": 0.248, + "step": 4837, + "teacher_loss": 0.2396085262298584 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.3412906229496002, + "learning_rate": 2.098308515252277e-05, + "loss": 0.2161, + "step": 4838, + "teacher_loss": 0.2022000253200531 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.5752498507499695, + "learning_rate": 2.098742229290155e-05, + "loss": 0.2153, + "step": 4839, + "teacher_loss": 0.17532062530517578 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.24048426747322083, + "learning_rate": 2.0991759433280326e-05, + "loss": 0.1937, + "step": 4840, + "teacher_loss": 0.1885371208190918 + }, + { + "compression_loss": 0.0, + "epoch": 0.87, + "label_loss": 0.927212119102478, + "learning_rate": 2.0996096573659104e-05, + "loss": 0.2854, + "step": 4841, + "teacher_loss": 0.21404683589935303 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.23985639214515686, + "learning_rate": 2.1000433714037875e-05, + "loss": 0.2428, + "step": 4842, + "teacher_loss": 0.2430967390537262 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3295680284500122, + "learning_rate": 2.1004770854416653e-05, + "loss": 0.1837, + "step": 4843, + "teacher_loss": 0.16749054193496704 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.38000011444091797, + "learning_rate": 2.100910799479543e-05, + "loss": 0.3014, + "step": 4844, + "teacher_loss": 0.2926191985607147 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.2840404808521271, + "learning_rate": 2.1013445135174208e-05, + "loss": 0.256, + "step": 4845, + "teacher_loss": 0.2528376877307892 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5129337906837463, + "learning_rate": 2.1017782275552985e-05, + "loss": 0.2186, + "step": 4846, + "teacher_loss": 0.18590568006038666 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.8131666779518127, + "learning_rate": 2.1022119415931763e-05, + "loss": 0.2982, + "step": 4847, + "teacher_loss": 0.24097684025764465 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.4696654677391052, + "learning_rate": 2.102645655631054e-05, + "loss": 0.2471, + "step": 4848, + "teacher_loss": 0.2224232703447342 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.6193064451217651, + "learning_rate": 2.1030793696689318e-05, + "loss": 0.266, + "step": 4849, + "teacher_loss": 0.22679319977760315 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.4176526665687561, + "learning_rate": 2.1035130837068096e-05, + "loss": 0.2032, + "step": 4850, + "teacher_loss": 0.17933514714241028 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.9364557266235352, + "learning_rate": 2.103946797744687e-05, + "loss": 0.408, + "step": 4851, + "teacher_loss": 0.3493291139602661 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.27834367752075195, + "learning_rate": 2.1043805117825648e-05, + "loss": 0.2334, + "step": 4852, + "teacher_loss": 0.22836343944072723 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.2658689022064209, + "learning_rate": 2.1048142258204422e-05, + "loss": 0.2473, + "step": 4853, + "teacher_loss": 0.24521687626838684 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.18103188276290894, + "learning_rate": 2.10524793985832e-05, + "loss": 0.1583, + "step": 4854, + "teacher_loss": 0.15573669970035553 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.40230584144592285, + "learning_rate": 2.1056816538961977e-05, + "loss": 0.4206, + "step": 4855, + "teacher_loss": 0.42267870903015137 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.6293624043464661, + "learning_rate": 2.1061153679340755e-05, + "loss": 0.2848, + "step": 4856, + "teacher_loss": 0.24655066430568695 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5312010049819946, + "learning_rate": 2.1065490819719532e-05, + "loss": 0.2648, + "step": 4857, + "teacher_loss": 0.23525011539459229 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3443912863731384, + "learning_rate": 2.106982796009831e-05, + "loss": 0.3098, + "step": 4858, + "teacher_loss": 0.3059839606285095 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.4562992453575134, + "learning_rate": 2.1074165100477088e-05, + "loss": 0.2453, + "step": 4859, + "teacher_loss": 0.2218371033668518 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3486790955066681, + "learning_rate": 2.1078502240855862e-05, + "loss": 0.1678, + "step": 4860, + "teacher_loss": 0.14771874248981476 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5641835927963257, + "learning_rate": 2.108283938123464e-05, + "loss": 0.3621, + "step": 4861, + "teacher_loss": 0.33961790800094604 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 1.0885891914367676, + "learning_rate": 2.1087176521613417e-05, + "loss": 0.3606, + "step": 4862, + "teacher_loss": 0.2797633707523346 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.8079729080200195, + "learning_rate": 2.1091513661992195e-05, + "loss": 0.3713, + "step": 4863, + "teacher_loss": 0.32277625799179077 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.38171976804733276, + "learning_rate": 2.109585080237097e-05, + "loss": 0.2445, + "step": 4864, + "teacher_loss": 0.2292403131723404 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5673741698265076, + "learning_rate": 2.1100187942749747e-05, + "loss": 0.3102, + "step": 4865, + "teacher_loss": 0.28162863850593567 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.49253854155540466, + "learning_rate": 2.1104525083128524e-05, + "loss": 0.2887, + "step": 4866, + "teacher_loss": 0.2660059332847595 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.2558417320251465, + "learning_rate": 2.1108862223507302e-05, + "loss": 0.1799, + "step": 4867, + "teacher_loss": 0.17151561379432678 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3826592266559601, + "learning_rate": 2.111319936388608e-05, + "loss": 0.2761, + "step": 4868, + "teacher_loss": 0.2642658054828644 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.32508817315101624, + "learning_rate": 2.1117536504264854e-05, + "loss": 0.2909, + "step": 4869, + "teacher_loss": 0.2871348261833191 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 1.0501303672790527, + "learning_rate": 2.112187364464363e-05, + "loss": 0.3836, + "step": 4870, + "teacher_loss": 0.30953431129455566 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.6292088031768799, + "learning_rate": 2.112621078502241e-05, + "loss": 0.2427, + "step": 4871, + "teacher_loss": 0.19978384673595428 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.40905478596687317, + "learning_rate": 2.1130547925401187e-05, + "loss": 0.2007, + "step": 4872, + "teacher_loss": 0.17753881216049194 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.2770938575267792, + "learning_rate": 2.1134885065779964e-05, + "loss": 0.2087, + "step": 4873, + "teacher_loss": 0.20105795562267303 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3100948929786682, + "learning_rate": 2.1139222206158742e-05, + "loss": 0.336, + "step": 4874, + "teacher_loss": 0.33886635303497314 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.49566251039505005, + "learning_rate": 2.1143559346537516e-05, + "loss": 0.2984, + "step": 4875, + "teacher_loss": 0.2764957845211029 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.25857824087142944, + "learning_rate": 2.1147896486916294e-05, + "loss": 0.2375, + "step": 4876, + "teacher_loss": 0.2351183295249939 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.4528854489326477, + "learning_rate": 2.115223362729507e-05, + "loss": 0.2461, + "step": 4877, + "teacher_loss": 0.22313690185546875 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.2965570092201233, + "learning_rate": 2.1156570767673846e-05, + "loss": 0.171, + "step": 4878, + "teacher_loss": 0.1570700705051422 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.191180020570755, + "learning_rate": 2.1160907908052623e-05, + "loss": 0.1832, + "step": 4879, + "teacher_loss": 0.1823207288980484 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5405227541923523, + "learning_rate": 2.11652450484314e-05, + "loss": 0.2765, + "step": 4880, + "teacher_loss": 0.24716366827487946 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.31240585446357727, + "learning_rate": 2.116958218881018e-05, + "loss": 0.2318, + "step": 4881, + "teacher_loss": 0.22281807661056519 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.48004692792892456, + "learning_rate": 2.1173919329188956e-05, + "loss": 0.2408, + "step": 4882, + "teacher_loss": 0.21426282823085785 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.1573874056339264, + "learning_rate": 2.1178256469567734e-05, + "loss": 0.1676, + "step": 4883, + "teacher_loss": 0.16872447729110718 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3565481901168823, + "learning_rate": 2.118259360994651e-05, + "loss": 0.2345, + "step": 4884, + "teacher_loss": 0.2209150791168213 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.6600135564804077, + "learning_rate": 2.118693075032529e-05, + "loss": 0.4031, + "step": 4885, + "teacher_loss": 0.37453049421310425 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.7931019067764282, + "learning_rate": 2.119126789070406e-05, + "loss": 0.5347, + "step": 4886, + "teacher_loss": 0.5059648156166077 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5532708764076233, + "learning_rate": 2.1195605031082837e-05, + "loss": 0.2624, + "step": 4887, + "teacher_loss": 0.23012381792068481 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.6887480020523071, + "learning_rate": 2.1199942171461615e-05, + "loss": 0.2144, + "step": 4888, + "teacher_loss": 0.16169053316116333 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.24550040066242218, + "learning_rate": 2.1204279311840393e-05, + "loss": 0.2036, + "step": 4889, + "teacher_loss": 0.19889873266220093 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.2875514626502991, + "learning_rate": 2.120861645221917e-05, + "loss": 0.1899, + "step": 4890, + "teacher_loss": 0.17899571359157562 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.1795966625213623, + "learning_rate": 2.1212953592597948e-05, + "loss": 0.1958, + "step": 4891, + "teacher_loss": 0.1976279616355896 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.3074508607387543, + "learning_rate": 2.1217290732976726e-05, + "loss": 0.2475, + "step": 4892, + "teacher_loss": 0.24087673425674438 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.46810466051101685, + "learning_rate": 2.1221627873355503e-05, + "loss": 0.2432, + "step": 4893, + "teacher_loss": 0.2181912213563919 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5411608219146729, + "learning_rate": 2.122596501373428e-05, + "loss": 0.198, + "step": 4894, + "teacher_loss": 0.15983399748802185 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.5552703142166138, + "learning_rate": 2.1230302154113055e-05, + "loss": 0.2345, + "step": 4895, + "teacher_loss": 0.198857843875885 + }, + { + "compression_loss": 0.0, + "epoch": 0.88, + "label_loss": 0.10408133268356323, + "learning_rate": 2.1234639294491833e-05, + "loss": 0.1328, + "step": 4896, + "teacher_loss": 0.13598322868347168 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5159024000167847, + "learning_rate": 2.1238976434870607e-05, + "loss": 0.3307, + "step": 4897, + "teacher_loss": 0.3101428747177124 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.46688953042030334, + "learning_rate": 2.1243313575249384e-05, + "loss": 0.1939, + "step": 4898, + "teacher_loss": 0.16351687908172607 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.3184567391872406, + "learning_rate": 2.1247650715628162e-05, + "loss": 0.2728, + "step": 4899, + "teacher_loss": 0.2676974833011627 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5943064093589783, + "learning_rate": 2.125198785600694e-05, + "loss": 0.299, + "step": 4900, + "teacher_loss": 0.26621532440185547 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 1.1503369808197021, + "learning_rate": 2.1256324996385717e-05, + "loss": 0.3046, + "step": 4901, + "teacher_loss": 0.2106659710407257 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4666033983230591, + "learning_rate": 2.1260662136764495e-05, + "loss": 0.2345, + "step": 4902, + "teacher_loss": 0.20870742201805115 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.9410369992256165, + "learning_rate": 2.1264999277143273e-05, + "loss": 0.3013, + "step": 4903, + "teacher_loss": 0.23018693923950195 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.2715277075767517, + "learning_rate": 2.1269336417522047e-05, + "loss": 0.1836, + "step": 4904, + "teacher_loss": 0.17387613654136658 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5608832836151123, + "learning_rate": 2.1273673557900824e-05, + "loss": 0.2896, + "step": 4905, + "teacher_loss": 0.2594859004020691 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.365366131067276, + "learning_rate": 2.1278010698279602e-05, + "loss": 0.285, + "step": 4906, + "teacher_loss": 0.2760217487812042 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4786378741264343, + "learning_rate": 2.128234783865838e-05, + "loss": 0.2506, + "step": 4907, + "teacher_loss": 0.22523358464241028 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.7722607851028442, + "learning_rate": 2.1286684979037154e-05, + "loss": 0.309, + "step": 4908, + "teacher_loss": 0.2575114965438843 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.49076831340789795, + "learning_rate": 2.129102211941593e-05, + "loss": 0.2741, + "step": 4909, + "teacher_loss": 0.25000888109207153 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5835728645324707, + "learning_rate": 2.129535925979471e-05, + "loss": 0.668, + "step": 4910, + "teacher_loss": 0.677399754524231 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.20781002938747406, + "learning_rate": 2.1299696400173487e-05, + "loss": 0.2223, + "step": 4911, + "teacher_loss": 0.22389043867588043 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.6984086632728577, + "learning_rate": 2.1304033540552264e-05, + "loss": 0.3971, + "step": 4912, + "teacher_loss": 0.36366546154022217 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5680651664733887, + "learning_rate": 2.130837068093104e-05, + "loss": 0.2798, + "step": 4913, + "teacher_loss": 0.24780185520648956 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4330735206604004, + "learning_rate": 2.1312707821309816e-05, + "loss": 0.3389, + "step": 4914, + "teacher_loss": 0.328416109085083 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.6274861097335815, + "learning_rate": 2.1317044961688594e-05, + "loss": 0.2524, + "step": 4915, + "teacher_loss": 0.21071599423885345 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.3112795650959015, + "learning_rate": 2.132138210206737e-05, + "loss": 0.2003, + "step": 4916, + "teacher_loss": 0.1880241483449936 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.24191522598266602, + "learning_rate": 2.132571924244615e-05, + "loss": 0.234, + "step": 4917, + "teacher_loss": 0.23306824266910553 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.18727698922157288, + "learning_rate": 2.1330056382824927e-05, + "loss": 0.2634, + "step": 4918, + "teacher_loss": 0.27183055877685547 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.6641616225242615, + "learning_rate": 2.13343935232037e-05, + "loss": 0.2917, + "step": 4919, + "teacher_loss": 0.2503282427787781 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.774112343788147, + "learning_rate": 2.133873066358248e-05, + "loss": 0.2973, + "step": 4920, + "teacher_loss": 0.24432498216629028 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.850703775882721, + "learning_rate": 2.1343067803961256e-05, + "loss": 0.353, + "step": 4921, + "teacher_loss": 0.2977176904678345 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5981116890907288, + "learning_rate": 2.134740494434003e-05, + "loss": 0.3596, + "step": 4922, + "teacher_loss": 0.33310291171073914 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.3872612714767456, + "learning_rate": 2.1351742084718808e-05, + "loss": 0.2092, + "step": 4923, + "teacher_loss": 0.18944603204727173 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.728947103023529, + "learning_rate": 2.1356079225097586e-05, + "loss": 0.3065, + "step": 4924, + "teacher_loss": 0.2596026062965393 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.8113147616386414, + "learning_rate": 2.1360416365476363e-05, + "loss": 0.3307, + "step": 4925, + "teacher_loss": 0.27726688981056213 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.44602975249290466, + "learning_rate": 2.136475350585514e-05, + "loss": 0.3185, + "step": 4926, + "teacher_loss": 0.30436283349990845 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.7215862274169922, + "learning_rate": 2.136909064623392e-05, + "loss": 0.3181, + "step": 4927, + "teacher_loss": 0.27322137355804443 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4445520043373108, + "learning_rate": 2.1373427786612696e-05, + "loss": 0.2003, + "step": 4928, + "teacher_loss": 0.17318299412727356 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4289535582065582, + "learning_rate": 2.137776492699147e-05, + "loss": 0.2952, + "step": 4929, + "teacher_loss": 0.28028473258018494 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.7420486211776733, + "learning_rate": 2.1382102067370248e-05, + "loss": 0.2525, + "step": 4930, + "teacher_loss": 0.19805431365966797 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.45578306913375854, + "learning_rate": 2.1386439207749022e-05, + "loss": 0.3078, + "step": 4931, + "teacher_loss": 0.2913256883621216 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.28732916712760925, + "learning_rate": 2.13907763481278e-05, + "loss": 0.2221, + "step": 4932, + "teacher_loss": 0.21480277180671692 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.19382552802562714, + "learning_rate": 2.1395113488506578e-05, + "loss": 0.1668, + "step": 4933, + "teacher_loss": 0.16377496719360352 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.3187437057495117, + "learning_rate": 2.1399450628885355e-05, + "loss": 0.1955, + "step": 4934, + "teacher_loss": 0.18182289600372314 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4643344283103943, + "learning_rate": 2.1403787769264133e-05, + "loss": 0.2693, + "step": 4935, + "teacher_loss": 0.24765314161777496 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5880998969078064, + "learning_rate": 2.140812490964291e-05, + "loss": 0.2622, + "step": 4936, + "teacher_loss": 0.2259451448917389 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.2634769082069397, + "learning_rate": 2.1412462050021688e-05, + "loss": 0.1845, + "step": 4937, + "teacher_loss": 0.17569279670715332 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.6411460638046265, + "learning_rate": 2.1416799190400466e-05, + "loss": 0.2442, + "step": 4938, + "teacher_loss": 0.20011621713638306 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.7642667889595032, + "learning_rate": 2.142113633077924e-05, + "loss": 0.4246, + "step": 4939, + "teacher_loss": 0.3868919312953949 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.359567254781723, + "learning_rate": 2.1425473471158017e-05, + "loss": 0.2789, + "step": 4940, + "teacher_loss": 0.2699160575866699 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.7302291393280029, + "learning_rate": 2.1429810611536792e-05, + "loss": 0.2627, + "step": 4941, + "teacher_loss": 0.2107389122247696 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.36183881759643555, + "learning_rate": 2.143414775191557e-05, + "loss": 0.2123, + "step": 4942, + "teacher_loss": 0.19568568468093872 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5417706966400146, + "learning_rate": 2.1438484892294347e-05, + "loss": 0.2218, + "step": 4943, + "teacher_loss": 0.18627247214317322 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.4372347891330719, + "learning_rate": 2.1442822032673125e-05, + "loss": 0.3483, + "step": 4944, + "teacher_loss": 0.33838292956352234 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.3364478647708893, + "learning_rate": 2.1447159173051902e-05, + "loss": 0.2099, + "step": 4945, + "teacher_loss": 0.19580666720867157 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.2590668201446533, + "learning_rate": 2.145149631343068e-05, + "loss": 0.2195, + "step": 4946, + "teacher_loss": 0.21508730947971344 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.44933491945266724, + "learning_rate": 2.1455833453809457e-05, + "loss": 0.3378, + "step": 4947, + "teacher_loss": 0.32538288831710815 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.49010169506073, + "learning_rate": 2.1460170594188232e-05, + "loss": 0.3009, + "step": 4948, + "teacher_loss": 0.2798406183719635 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.6871552467346191, + "learning_rate": 2.146450773456701e-05, + "loss": 0.3553, + "step": 4949, + "teacher_loss": 0.3184507489204407 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.22841525077819824, + "learning_rate": 2.1468844874945787e-05, + "loss": 0.1431, + "step": 4950, + "teacher_loss": 0.13367322087287903 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.5973429083824158, + "learning_rate": 2.147318201532456e-05, + "loss": 0.2642, + "step": 4951, + "teacher_loss": 0.2272353619337082 + }, + { + "compression_loss": 0.0, + "epoch": 0.89, + "label_loss": 0.32793813943862915, + "learning_rate": 2.147751915570334e-05, + "loss": 0.2603, + "step": 4952, + "teacher_loss": 0.2528391182422638 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.4855886697769165, + "learning_rate": 2.1481856296082116e-05, + "loss": 0.1927, + "step": 4953, + "teacher_loss": 0.16010509431362152 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.8270766735076904, + "learning_rate": 2.1486193436460894e-05, + "loss": 0.4078, + "step": 4954, + "teacher_loss": 0.3612019121646881 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5824657678604126, + "learning_rate": 2.149053057683967e-05, + "loss": 0.3259, + "step": 4955, + "teacher_loss": 0.2974180579185486 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.6485385894775391, + "learning_rate": 2.149486771721845e-05, + "loss": 0.28, + "step": 4956, + "teacher_loss": 0.23900456726551056 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5515516996383667, + "learning_rate": 2.1499204857597224e-05, + "loss": 0.2734, + "step": 4957, + "teacher_loss": 0.2424619495868683 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.47967639565467834, + "learning_rate": 2.1503541997976e-05, + "loss": 0.3044, + "step": 4958, + "teacher_loss": 0.28497135639190674 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.490291029214859, + "learning_rate": 2.150787913835478e-05, + "loss": 0.227, + "step": 4959, + "teacher_loss": 0.19769850373268127 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.523702085018158, + "learning_rate": 2.1512216278733556e-05, + "loss": 0.295, + "step": 4960, + "teacher_loss": 0.26961225271224976 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.8569098711013794, + "learning_rate": 2.1516553419112334e-05, + "loss": 0.5047, + "step": 4961, + "teacher_loss": 0.465536892414093 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.48284712433815, + "learning_rate": 2.1520890559491108e-05, + "loss": 0.2579, + "step": 4962, + "teacher_loss": 0.23287333548069 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.35147833824157715, + "learning_rate": 2.1525227699869886e-05, + "loss": 0.2183, + "step": 4963, + "teacher_loss": 0.20348471403121948 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.534899115562439, + "learning_rate": 2.1529564840248663e-05, + "loss": 0.246, + "step": 4964, + "teacher_loss": 0.21386224031448364 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.8693801760673523, + "learning_rate": 2.153390198062744e-05, + "loss": 0.2898, + "step": 4965, + "teacher_loss": 0.2253713309764862 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.8280802965164185, + "learning_rate": 2.1538239121006215e-05, + "loss": 0.2671, + "step": 4966, + "teacher_loss": 0.20475471019744873 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5361369252204895, + "learning_rate": 2.1542576261384993e-05, + "loss": 0.3386, + "step": 4967, + "teacher_loss": 0.3166942596435547 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.2746128439903259, + "learning_rate": 2.154691340176377e-05, + "loss": 0.2124, + "step": 4968, + "teacher_loss": 0.20548345148563385 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.8689091801643372, + "learning_rate": 2.1551250542142548e-05, + "loss": 0.3106, + "step": 4969, + "teacher_loss": 0.24857546389102936 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.23524591326713562, + "learning_rate": 2.1555587682521326e-05, + "loss": 0.2069, + "step": 4970, + "teacher_loss": 0.20377680659294128 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5544571280479431, + "learning_rate": 2.1559924822900103e-05, + "loss": 0.2604, + "step": 4971, + "teacher_loss": 0.22770847380161285 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.4059109389781952, + "learning_rate": 2.156426196327888e-05, + "loss": 0.2912, + "step": 4972, + "teacher_loss": 0.27842846512794495 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.38329017162323, + "learning_rate": 2.1568599103657655e-05, + "loss": 0.2156, + "step": 4973, + "teacher_loss": 0.19700732827186584 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.3662585914134979, + "learning_rate": 2.1572936244036433e-05, + "loss": 0.1804, + "step": 4974, + "teacher_loss": 0.15976077318191528 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.06622756272554398, + "learning_rate": 2.1577273384415207e-05, + "loss": 0.1589, + "step": 4975, + "teacher_loss": 0.16914281249046326 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.361545205116272, + "learning_rate": 2.1581610524793985e-05, + "loss": 0.2637, + "step": 4976, + "teacher_loss": 0.2528447210788727 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.34256407618522644, + "learning_rate": 2.1585947665172762e-05, + "loss": 0.2355, + "step": 4977, + "teacher_loss": 0.2235802412033081 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.4668552875518799, + "learning_rate": 2.159028480555154e-05, + "loss": 0.162, + "step": 4978, + "teacher_loss": 0.12809503078460693 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.46656686067581177, + "learning_rate": 2.1594621945930318e-05, + "loss": 0.2704, + "step": 4979, + "teacher_loss": 0.24855628609657288 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.2090367078781128, + "learning_rate": 2.1598959086309095e-05, + "loss": 0.1917, + "step": 4980, + "teacher_loss": 0.1898198127746582 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.3238547146320343, + "learning_rate": 2.1603296226687873e-05, + "loss": 0.3156, + "step": 4981, + "teacher_loss": 0.314701110124588 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.6774986982345581, + "learning_rate": 2.160763336706665e-05, + "loss": 0.3289, + "step": 4982, + "teacher_loss": 0.2901648283004761 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.8897311687469482, + "learning_rate": 2.1611970507445425e-05, + "loss": 0.3759, + "step": 4983, + "teacher_loss": 0.31875723600387573 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.9408584833145142, + "learning_rate": 2.16163076478242e-05, + "loss": 0.416, + "step": 4984, + "teacher_loss": 0.35763445496559143 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.33588656783103943, + "learning_rate": 2.1620644788202977e-05, + "loss": 0.2356, + "step": 4985, + "teacher_loss": 0.224424809217453 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.2043946385383606, + "learning_rate": 2.1624981928581754e-05, + "loss": 0.1916, + "step": 4986, + "teacher_loss": 0.190158873796463 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.6215960383415222, + "learning_rate": 2.1629319068960532e-05, + "loss": 0.2927, + "step": 4987, + "teacher_loss": 0.25620540976524353 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.2454887330532074, + "learning_rate": 2.163365620933931e-05, + "loss": 0.2377, + "step": 4988, + "teacher_loss": 0.23683345317840576 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.37675097584724426, + "learning_rate": 2.1637993349718087e-05, + "loss": 0.3096, + "step": 4989, + "teacher_loss": 0.3021875023841858 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.930221676826477, + "learning_rate": 2.1642330490096865e-05, + "loss": 0.7065, + "step": 4990, + "teacher_loss": 0.681634247303009 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.2444666028022766, + "learning_rate": 2.1646667630475642e-05, + "loss": 0.2384, + "step": 4991, + "teacher_loss": 0.23774632811546326 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5182797908782959, + "learning_rate": 2.1651004770854417e-05, + "loss": 0.2416, + "step": 4992, + "teacher_loss": 0.2108837515115738 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.25727009773254395, + "learning_rate": 2.1655341911233194e-05, + "loss": 0.1939, + "step": 4993, + "teacher_loss": 0.18687215447425842 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.4268108010292053, + "learning_rate": 2.1659679051611972e-05, + "loss": 0.1865, + "step": 4994, + "teacher_loss": 0.15983860194683075 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.7278549671173096, + "learning_rate": 2.1664016191990746e-05, + "loss": 0.3292, + "step": 4995, + "teacher_loss": 0.28487664461135864 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.6536684036254883, + "learning_rate": 2.1668353332369524e-05, + "loss": 0.2593, + "step": 4996, + "teacher_loss": 0.215532124042511 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.2759597599506378, + "learning_rate": 2.16726904727483e-05, + "loss": 0.1823, + "step": 4997, + "teacher_loss": 0.17187872529029846 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.4096769690513611, + "learning_rate": 2.167702761312708e-05, + "loss": 0.2555, + "step": 4998, + "teacher_loss": 0.23835709691047668 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.48129016160964966, + "learning_rate": 2.1681364753505857e-05, + "loss": 0.2616, + "step": 4999, + "teacher_loss": 0.23719972372055054 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5433094501495361, + "learning_rate": 2.1685701893884634e-05, + "loss": 0.2884, + "step": 5000, + "teacher_loss": 0.2601229250431061 + }, + { + "epoch": 0.9, + "eval_exact_match": 79.94323557237465, + "eval_f1": 87.38469094119894, + "step": 5000 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.3355875015258789, + "learning_rate": 2.169003903426341e-05, + "loss": 0.1989, + "step": 5001, + "teacher_loss": 0.18374839425086975 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.4448932409286499, + "learning_rate": 2.1694376174642186e-05, + "loss": 0.2127, + "step": 5002, + "teacher_loss": 0.18689042329788208 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.40050047636032104, + "learning_rate": 2.1698713315020964e-05, + "loss": 0.2233, + "step": 5003, + "teacher_loss": 0.2036624550819397 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.347493052482605, + "learning_rate": 2.170305045539974e-05, + "loss": 0.2596, + "step": 5004, + "teacher_loss": 0.2498435527086258 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.39709848165512085, + "learning_rate": 2.170738759577852e-05, + "loss": 0.286, + "step": 5005, + "teacher_loss": 0.27360135316848755 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.7585475444793701, + "learning_rate": 2.1711724736157293e-05, + "loss": 0.3192, + "step": 5006, + "teacher_loss": 0.2704097330570221 + }, + { + "compression_loss": 0.0, + "epoch": 0.9, + "label_loss": 0.5988269448280334, + "learning_rate": 2.171606187653607e-05, + "loss": 0.2293, + "step": 5007, + "teacher_loss": 0.1881895363330841 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.602444589138031, + "learning_rate": 2.172039901691485e-05, + "loss": 0.2354, + "step": 5008, + "teacher_loss": 0.19463714957237244 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.32646477222442627, + "learning_rate": 2.1724736157293626e-05, + "loss": 0.2405, + "step": 5009, + "teacher_loss": 0.23095834255218506 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2694118916988373, + "learning_rate": 2.17290732976724e-05, + "loss": 0.192, + "step": 5010, + "teacher_loss": 0.18341705203056335 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.8982809782028198, + "learning_rate": 2.1733410438051178e-05, + "loss": 0.2493, + "step": 5011, + "teacher_loss": 0.1772342324256897 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.3746625781059265, + "learning_rate": 2.1737747578429955e-05, + "loss": 0.243, + "step": 5012, + "teacher_loss": 0.2283729612827301 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2214689403772354, + "learning_rate": 2.1742084718808733e-05, + "loss": 0.2358, + "step": 5013, + "teacher_loss": 0.23736536502838135 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2346813678741455, + "learning_rate": 2.174642185918751e-05, + "loss": 0.1941, + "step": 5014, + "teacher_loss": 0.18957194685935974 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5537986755371094, + "learning_rate": 2.175075899956629e-05, + "loss": 0.2514, + "step": 5015, + "teacher_loss": 0.21782582998275757 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.23473691940307617, + "learning_rate": 2.1755096139945066e-05, + "loss": 0.1776, + "step": 5016, + "teacher_loss": 0.1712019145488739 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.8613081574440002, + "learning_rate": 2.175943328032384e-05, + "loss": 0.3249, + "step": 5017, + "teacher_loss": 0.2652929723262787 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.3878059685230255, + "learning_rate": 2.1763770420702618e-05, + "loss": 0.2736, + "step": 5018, + "teacher_loss": 0.26094740629196167 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.26398012042045593, + "learning_rate": 2.1768107561081392e-05, + "loss": 0.2055, + "step": 5019, + "teacher_loss": 0.19902634620666504 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.4983898997306824, + "learning_rate": 2.177244470146017e-05, + "loss": 0.2515, + "step": 5020, + "teacher_loss": 0.22410613298416138 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.36464983224868774, + "learning_rate": 2.1776781841838947e-05, + "loss": 0.2438, + "step": 5021, + "teacher_loss": 0.23041774332523346 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.3985564112663269, + "learning_rate": 2.1781118982217725e-05, + "loss": 0.2978, + "step": 5022, + "teacher_loss": 0.2865598201751709 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.33505991101264954, + "learning_rate": 2.1785456122596503e-05, + "loss": 0.2024, + "step": 5023, + "teacher_loss": 0.18762236833572388 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.9182227253913879, + "learning_rate": 2.178979326297528e-05, + "loss": 0.3878, + "step": 5024, + "teacher_loss": 0.3289141356945038 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.6506767272949219, + "learning_rate": 2.1794130403354058e-05, + "loss": 0.2961, + "step": 5025, + "teacher_loss": 0.25674110651016235 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.8632342219352722, + "learning_rate": 2.1798467543732835e-05, + "loss": 0.3071, + "step": 5026, + "teacher_loss": 0.24528968334197998 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.7953322529792786, + "learning_rate": 2.180280468411161e-05, + "loss": 0.3426, + "step": 5027, + "teacher_loss": 0.2922816276550293 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.9344212412834167, + "learning_rate": 2.1807141824490384e-05, + "loss": 0.5585, + "step": 5028, + "teacher_loss": 0.5166952610015869 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5721264481544495, + "learning_rate": 2.181147896486916e-05, + "loss": 0.2318, + "step": 5029, + "teacher_loss": 0.1940159946680069 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.6489219665527344, + "learning_rate": 2.181581610524794e-05, + "loss": 0.2724, + "step": 5030, + "teacher_loss": 0.23057657480239868 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.26539158821105957, + "learning_rate": 2.1820153245626717e-05, + "loss": 0.2918, + "step": 5031, + "teacher_loss": 0.29478883743286133 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.557948648929596, + "learning_rate": 2.1824490386005494e-05, + "loss": 0.2394, + "step": 5032, + "teacher_loss": 0.20399603247642517 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.433555543422699, + "learning_rate": 2.1828827526384272e-05, + "loss": 0.2657, + "step": 5033, + "teacher_loss": 0.24700413644313812 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.42045560479164124, + "learning_rate": 2.183316466676305e-05, + "loss": 0.3181, + "step": 5034, + "teacher_loss": 0.30669713020324707 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5812956094741821, + "learning_rate": 2.1837501807141827e-05, + "loss": 0.2785, + "step": 5035, + "teacher_loss": 0.24484045803546906 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.27369487285614014, + "learning_rate": 2.18418389475206e-05, + "loss": 0.2302, + "step": 5036, + "teacher_loss": 0.22542057931423187 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.43715769052505493, + "learning_rate": 2.184617608789938e-05, + "loss": 0.2805, + "step": 5037, + "teacher_loss": 0.26312124729156494 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.34317851066589355, + "learning_rate": 2.1850513228278157e-05, + "loss": 0.1996, + "step": 5038, + "teacher_loss": 0.1836351901292801 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2169075906276703, + "learning_rate": 2.185485036865693e-05, + "loss": 0.2117, + "step": 5039, + "teacher_loss": 0.21115203201770782 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2305580973625183, + "learning_rate": 2.185918750903571e-05, + "loss": 0.1531, + "step": 5040, + "teacher_loss": 0.144525408744812 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.36952972412109375, + "learning_rate": 2.1863524649414486e-05, + "loss": 0.2234, + "step": 5041, + "teacher_loss": 0.2071918547153473 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.42547714710235596, + "learning_rate": 2.1867861789793264e-05, + "loss": 0.2508, + "step": 5042, + "teacher_loss": 0.23133596777915955 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.7025610208511353, + "learning_rate": 2.187219893017204e-05, + "loss": 0.2835, + "step": 5043, + "teacher_loss": 0.23689395189285278 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5379042029380798, + "learning_rate": 2.187653607055082e-05, + "loss": 0.4063, + "step": 5044, + "teacher_loss": 0.3916476368904114 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5258693695068359, + "learning_rate": 2.1880873210929593e-05, + "loss": 0.2331, + "step": 5045, + "teacher_loss": 0.2005929797887802 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.3884434103965759, + "learning_rate": 2.188521035130837e-05, + "loss": 0.2488, + "step": 5046, + "teacher_loss": 0.23325151205062866 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.8033318519592285, + "learning_rate": 2.188954749168715e-05, + "loss": 0.268, + "step": 5047, + "teacher_loss": 0.20854748785495758 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.16746020317077637, + "learning_rate": 2.1893884632065926e-05, + "loss": 0.1625, + "step": 5048, + "teacher_loss": 0.16191929578781128 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.6545905470848083, + "learning_rate": 2.18982217724447e-05, + "loss": 0.4166, + "step": 5049, + "teacher_loss": 0.39011919498443604 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5566633343696594, + "learning_rate": 2.1902558912823478e-05, + "loss": 0.3195, + "step": 5050, + "teacher_loss": 0.29318100214004517 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.41910529136657715, + "learning_rate": 2.1906896053202256e-05, + "loss": 0.2, + "step": 5051, + "teacher_loss": 0.17566964030265808 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.6011900901794434, + "learning_rate": 2.1911233193581033e-05, + "loss": 0.1756, + "step": 5052, + "teacher_loss": 0.12832866609096527 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.7882957458496094, + "learning_rate": 2.191557033395981e-05, + "loss": 0.3264, + "step": 5053, + "teacher_loss": 0.2751200795173645 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.6054220795631409, + "learning_rate": 2.1919907474338585e-05, + "loss": 0.3809, + "step": 5054, + "teacher_loss": 0.3559808135032654 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.22506718337535858, + "learning_rate": 2.1924244614717363e-05, + "loss": 0.2145, + "step": 5055, + "teacher_loss": 0.2133278250694275 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.4036915600299835, + "learning_rate": 2.192858175509614e-05, + "loss": 0.2935, + "step": 5056, + "teacher_loss": 0.2812880873680115 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2568385899066925, + "learning_rate": 2.1932918895474918e-05, + "loss": 0.2323, + "step": 5057, + "teacher_loss": 0.2295297086238861 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.7053796052932739, + "learning_rate": 2.1937256035853696e-05, + "loss": 0.3442, + "step": 5058, + "teacher_loss": 0.3040328919887543 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.5729056000709534, + "learning_rate": 2.1941593176232473e-05, + "loss": 0.2077, + "step": 5059, + "teacher_loss": 0.16707561910152435 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.2908748388290405, + "learning_rate": 2.1945930316611247e-05, + "loss": 0.1946, + "step": 5060, + "teacher_loss": 0.18386542797088623 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.4173637628555298, + "learning_rate": 2.1950267456990025e-05, + "loss": 0.246, + "step": 5061, + "teacher_loss": 0.2269698977470398 + }, + { + "compression_loss": 0.0, + "epoch": 0.91, + "label_loss": 0.4610925614833832, + "learning_rate": 2.1954604597368803e-05, + "loss": 0.2356, + "step": 5062, + "teacher_loss": 0.21059909462928772 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.36663299798965454, + "learning_rate": 2.1958941737747577e-05, + "loss": 0.277, + "step": 5063, + "teacher_loss": 0.26699942350387573 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.7183891534805298, + "learning_rate": 2.1963278878126355e-05, + "loss": 0.3813, + "step": 5064, + "teacher_loss": 0.34384071826934814 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.9515157341957092, + "learning_rate": 2.1967616018505132e-05, + "loss": 0.3374, + "step": 5065, + "teacher_loss": 0.2691618800163269 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.4123256206512451, + "learning_rate": 2.197195315888391e-05, + "loss": 0.2926, + "step": 5066, + "teacher_loss": 0.2793012261390686 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.38430964946746826, + "learning_rate": 2.1976290299262687e-05, + "loss": 0.2285, + "step": 5067, + "teacher_loss": 0.21119289100170135 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3126375079154968, + "learning_rate": 2.1980627439641465e-05, + "loss": 0.1925, + "step": 5068, + "teacher_loss": 0.17915701866149902 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.39222753047943115, + "learning_rate": 2.1984964580020243e-05, + "loss": 0.2419, + "step": 5069, + "teacher_loss": 0.22518888115882874 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.49275559186935425, + "learning_rate": 2.198930172039902e-05, + "loss": 0.278, + "step": 5070, + "teacher_loss": 0.2541758716106415 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.2644200921058655, + "learning_rate": 2.199363886077779e-05, + "loss": 0.1939, + "step": 5071, + "teacher_loss": 0.18608510494232178 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.4927518367767334, + "learning_rate": 2.199797600115657e-05, + "loss": 0.2373, + "step": 5072, + "teacher_loss": 0.20892333984375 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3768884539604187, + "learning_rate": 2.2002313141535346e-05, + "loss": 0.2388, + "step": 5073, + "teacher_loss": 0.22350391745567322 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.430931031703949, + "learning_rate": 2.2006650281914124e-05, + "loss": 0.2266, + "step": 5074, + "teacher_loss": 0.20389777421951294 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.5921732783317566, + "learning_rate": 2.20109874222929e-05, + "loss": 0.2323, + "step": 5075, + "teacher_loss": 0.19230225682258606 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 1.0756014585494995, + "learning_rate": 2.201532456267168e-05, + "loss": 0.5575, + "step": 5076, + "teacher_loss": 0.49994099140167236 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.6048029661178589, + "learning_rate": 2.2019661703050457e-05, + "loss": 0.2466, + "step": 5077, + "teacher_loss": 0.20683380961418152 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.8456159830093384, + "learning_rate": 2.2023998843429234e-05, + "loss": 0.7344, + "step": 5078, + "teacher_loss": 0.7220935225486755 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.32778510451316833, + "learning_rate": 2.2028335983808012e-05, + "loss": 0.2019, + "step": 5079, + "teacher_loss": 0.18796800076961517 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.5015953779220581, + "learning_rate": 2.2032673124186786e-05, + "loss": 0.2505, + "step": 5080, + "teacher_loss": 0.22256334125995636 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.49273157119750977, + "learning_rate": 2.2037010264565564e-05, + "loss": 0.305, + "step": 5081, + "teacher_loss": 0.2841217517852783 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.40552520751953125, + "learning_rate": 2.2041347404944338e-05, + "loss": 0.3022, + "step": 5082, + "teacher_loss": 0.29073381423950195 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.6219925880432129, + "learning_rate": 2.2045684545323116e-05, + "loss": 0.3531, + "step": 5083, + "teacher_loss": 0.32317888736724854 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.44387075304985046, + "learning_rate": 2.2050021685701893e-05, + "loss": 0.3448, + "step": 5084, + "teacher_loss": 0.33377572894096375 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.34271883964538574, + "learning_rate": 2.205435882608067e-05, + "loss": 0.1735, + "step": 5085, + "teacher_loss": 0.15472745895385742 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.8497046232223511, + "learning_rate": 2.205869596645945e-05, + "loss": 0.2463, + "step": 5086, + "teacher_loss": 0.17930924892425537 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.22965535521507263, + "learning_rate": 2.2063033106838226e-05, + "loss": 0.2465, + "step": 5087, + "teacher_loss": 0.24835895001888275 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.6426375508308411, + "learning_rate": 2.2067370247217004e-05, + "loss": 0.3705, + "step": 5088, + "teacher_loss": 0.34030839800834656 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.46825292706489563, + "learning_rate": 2.2071707387595778e-05, + "loss": 0.2197, + "step": 5089, + "teacher_loss": 0.19205589592456818 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.4416676163673401, + "learning_rate": 2.2076044527974556e-05, + "loss": 0.2182, + "step": 5090, + "teacher_loss": 0.19341400265693665 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3583717346191406, + "learning_rate": 2.2080381668353333e-05, + "loss": 0.2611, + "step": 5091, + "teacher_loss": 0.2503451108932495 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.4295124411582947, + "learning_rate": 2.208471880873211e-05, + "loss": 0.2113, + "step": 5092, + "teacher_loss": 0.18707753717899323 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.6649757623672485, + "learning_rate": 2.2089055949110885e-05, + "loss": 0.2709, + "step": 5093, + "teacher_loss": 0.2270745038986206 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.07297470420598984, + "learning_rate": 2.2093393089489663e-05, + "loss": 0.167, + "step": 5094, + "teacher_loss": 0.1774577796459198 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3141477108001709, + "learning_rate": 2.209773022986844e-05, + "loss": 0.2197, + "step": 5095, + "teacher_loss": 0.20923012495040894 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3098946511745453, + "learning_rate": 2.2102067370247218e-05, + "loss": 0.2063, + "step": 5096, + "teacher_loss": 0.19479797780513763 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3089548647403717, + "learning_rate": 2.2106404510625996e-05, + "loss": 0.2663, + "step": 5097, + "teacher_loss": 0.2615513205528259 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.22667396068572998, + "learning_rate": 2.211074165100477e-05, + "loss": 0.2841, + "step": 5098, + "teacher_loss": 0.2905122637748718 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.36995601654052734, + "learning_rate": 2.2115078791383548e-05, + "loss": 0.3049, + "step": 5099, + "teacher_loss": 0.2977074086666107 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.5350288152694702, + "learning_rate": 2.2119415931762325e-05, + "loss": 0.248, + "step": 5100, + "teacher_loss": 0.21605652570724487 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.6179404854774475, + "learning_rate": 2.2123753072141103e-05, + "loss": 0.3451, + "step": 5101, + "teacher_loss": 0.3148387372493744 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.29611727595329285, + "learning_rate": 2.212809021251988e-05, + "loss": 0.2341, + "step": 5102, + "teacher_loss": 0.22723616659641266 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.2847411036491394, + "learning_rate": 2.2132427352898658e-05, + "loss": 0.2304, + "step": 5103, + "teacher_loss": 0.22436955571174622 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.15685270726680756, + "learning_rate": 2.2136764493277432e-05, + "loss": 0.1656, + "step": 5104, + "teacher_loss": 0.16652518510818481 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.23992204666137695, + "learning_rate": 2.214110163365621e-05, + "loss": 0.2268, + "step": 5105, + "teacher_loss": 0.22533570230007172 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3588973581790924, + "learning_rate": 2.2145438774034988e-05, + "loss": 0.2305, + "step": 5106, + "teacher_loss": 0.21623027324676514 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.17951223254203796, + "learning_rate": 2.2149775914413762e-05, + "loss": 0.1833, + "step": 5107, + "teacher_loss": 0.18373596668243408 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.6676278710365295, + "learning_rate": 2.215411305479254e-05, + "loss": 0.2464, + "step": 5108, + "teacher_loss": 0.19961433112621307 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3264515995979309, + "learning_rate": 2.2158450195171317e-05, + "loss": 0.2459, + "step": 5109, + "teacher_loss": 0.2369847148656845 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.9933271408081055, + "learning_rate": 2.2162787335550095e-05, + "loss": 0.2519, + "step": 5110, + "teacher_loss": 0.16952507197856903 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 1.8078665733337402, + "learning_rate": 2.2167124475928872e-05, + "loss": 0.4692, + "step": 5111, + "teacher_loss": 0.3204426169395447 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.40188518166542053, + "learning_rate": 2.217146161630765e-05, + "loss": 0.266, + "step": 5112, + "teacher_loss": 0.25089964270591736 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.44755205512046814, + "learning_rate": 2.2175798756686428e-05, + "loss": 0.2854, + "step": 5113, + "teacher_loss": 0.26737403869628906 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.5052809715270996, + "learning_rate": 2.2180135897065205e-05, + "loss": 0.2689, + "step": 5114, + "teacher_loss": 0.24268098175525665 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.9582458734512329, + "learning_rate": 2.218447303744398e-05, + "loss": 0.2949, + "step": 5115, + "teacher_loss": 0.22121265530586243 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.3993985652923584, + "learning_rate": 2.2188810177822754e-05, + "loss": 0.1597, + "step": 5116, + "teacher_loss": 0.13308513164520264 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.17697572708129883, + "learning_rate": 2.219314731820153e-05, + "loss": 0.2512, + "step": 5117, + "teacher_loss": 0.2594691812992096 + }, + { + "compression_loss": 0.0, + "epoch": 0.92, + "label_loss": 0.18234822154045105, + "learning_rate": 2.219748445858031e-05, + "loss": 0.2259, + "step": 5118, + "teacher_loss": 0.2307826280593872 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.3989626467227936, + "learning_rate": 2.2201821598959086e-05, + "loss": 0.2394, + "step": 5119, + "teacher_loss": 0.22162196040153503 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.5394414663314819, + "learning_rate": 2.2206158739337864e-05, + "loss": 0.3933, + "step": 5120, + "teacher_loss": 0.37701237201690674 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.2066372036933899, + "learning_rate": 2.2210495879716642e-05, + "loss": 0.3219, + "step": 5121, + "teacher_loss": 0.3346531093120575 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.42274904251098633, + "learning_rate": 2.221483302009542e-05, + "loss": 0.2814, + "step": 5122, + "teacher_loss": 0.26568275690078735 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.23372864723205566, + "learning_rate": 2.2219170160474197e-05, + "loss": 0.177, + "step": 5123, + "teacher_loss": 0.1706833392381668 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.45319265127182007, + "learning_rate": 2.222350730085297e-05, + "loss": 0.2611, + "step": 5124, + "teacher_loss": 0.23978909850120544 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.5599527359008789, + "learning_rate": 2.222784444123175e-05, + "loss": 0.2652, + "step": 5125, + "teacher_loss": 0.23246073722839355 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.4552772045135498, + "learning_rate": 2.2232181581610523e-05, + "loss": 0.2558, + "step": 5126, + "teacher_loss": 0.2335938811302185 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.3029862642288208, + "learning_rate": 2.22365187219893e-05, + "loss": 0.2063, + "step": 5127, + "teacher_loss": 0.19554370641708374 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.31261229515075684, + "learning_rate": 2.224085586236808e-05, + "loss": 0.2244, + "step": 5128, + "teacher_loss": 0.214548259973526 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.6989035606384277, + "learning_rate": 2.2245193002746856e-05, + "loss": 0.2245, + "step": 5129, + "teacher_loss": 0.17179185152053833 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.4375190734863281, + "learning_rate": 2.2249530143125634e-05, + "loss": 0.3059, + "step": 5130, + "teacher_loss": 0.29128098487854004 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.9181143045425415, + "learning_rate": 2.225386728350441e-05, + "loss": 0.2929, + "step": 5131, + "teacher_loss": 0.22344090044498444 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.444706529378891, + "learning_rate": 2.225820442388319e-05, + "loss": 0.3929, + "step": 5132, + "teacher_loss": 0.3871840238571167 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.19605103135108948, + "learning_rate": 2.2262541564261963e-05, + "loss": 0.2539, + "step": 5133, + "teacher_loss": 0.26034265756607056 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.8624093532562256, + "learning_rate": 2.226687870464074e-05, + "loss": 0.2562, + "step": 5134, + "teacher_loss": 0.18885639309883118 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.34122729301452637, + "learning_rate": 2.2271215845019518e-05, + "loss": 0.2629, + "step": 5135, + "teacher_loss": 0.25414156913757324 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.6111626625061035, + "learning_rate": 2.2275552985398296e-05, + "loss": 0.245, + "step": 5136, + "teacher_loss": 0.20434913039207458 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.2054591178894043, + "learning_rate": 2.227989012577707e-05, + "loss": 0.226, + "step": 5137, + "teacher_loss": 0.2283117175102234 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.20084145665168762, + "learning_rate": 2.2284227266155848e-05, + "loss": 0.1909, + "step": 5138, + "teacher_loss": 0.18979953229427338 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.33382779359817505, + "learning_rate": 2.2288564406534625e-05, + "loss": 0.2195, + "step": 5139, + "teacher_loss": 0.20679888129234314 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 1.0286345481872559, + "learning_rate": 2.2292901546913403e-05, + "loss": 0.3946, + "step": 5140, + "teacher_loss": 0.3241494297981262 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.5276502370834351, + "learning_rate": 2.229723868729218e-05, + "loss": 0.3214, + "step": 5141, + "teacher_loss": 0.29852110147476196 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.7595165967941284, + "learning_rate": 2.2301575827670955e-05, + "loss": 0.324, + "step": 5142, + "teacher_loss": 0.2755633592605591 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.2286558896303177, + "learning_rate": 2.2305912968049732e-05, + "loss": 0.2166, + "step": 5143, + "teacher_loss": 0.21522654592990875 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.6739788055419922, + "learning_rate": 2.231025010842851e-05, + "loss": 0.2739, + "step": 5144, + "teacher_loss": 0.2294815331697464 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.14675763249397278, + "learning_rate": 2.2314587248807288e-05, + "loss": 0.1761, + "step": 5145, + "teacher_loss": 0.17932449281215668 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.48987025022506714, + "learning_rate": 2.2318924389186065e-05, + "loss": 0.2334, + "step": 5146, + "teacher_loss": 0.20487241446971893 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.21976234018802643, + "learning_rate": 2.232326152956484e-05, + "loss": 0.2227, + "step": 5147, + "teacher_loss": 0.22299061715602875 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.7391775846481323, + "learning_rate": 2.2327598669943617e-05, + "loss": 0.3247, + "step": 5148, + "teacher_loss": 0.27859389781951904 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.3850157856941223, + "learning_rate": 2.2331935810322395e-05, + "loss": 0.2686, + "step": 5149, + "teacher_loss": 0.2556228041648865 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.3147790729999542, + "learning_rate": 2.2336272950701172e-05, + "loss": 0.1748, + "step": 5150, + "teacher_loss": 0.15925270318984985 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.5497558116912842, + "learning_rate": 2.2340610091079947e-05, + "loss": 0.3476, + "step": 5151, + "teacher_loss": 0.3251686096191406 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.4033011794090271, + "learning_rate": 2.2344947231458724e-05, + "loss": 0.2875, + "step": 5152, + "teacher_loss": 0.27466481924057007 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.19783766567707062, + "learning_rate": 2.2349284371837502e-05, + "loss": 0.2125, + "step": 5153, + "teacher_loss": 0.21415627002716064 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.23227161169052124, + "learning_rate": 2.235362151221628e-05, + "loss": 0.1803, + "step": 5154, + "teacher_loss": 0.17449629306793213 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.5798959732055664, + "learning_rate": 2.2357958652595057e-05, + "loss": 0.2762, + "step": 5155, + "teacher_loss": 0.24248716235160828 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 1.0399142503738403, + "learning_rate": 2.2362295792973835e-05, + "loss": 0.3185, + "step": 5156, + "teacher_loss": 0.23838664591312408 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.48925402760505676, + "learning_rate": 2.2366632933352612e-05, + "loss": 0.2493, + "step": 5157, + "teacher_loss": 0.22261153161525726 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.25585877895355225, + "learning_rate": 2.2370970073731387e-05, + "loss": 0.1867, + "step": 5158, + "teacher_loss": 0.1790388822555542 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.294344961643219, + "learning_rate": 2.2375307214110164e-05, + "loss": 0.211, + "step": 5159, + "teacher_loss": 0.2017013430595398 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.6578915119171143, + "learning_rate": 2.237964435448894e-05, + "loss": 0.2973, + "step": 5160, + "teacher_loss": 0.2572368383407593 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.4158608913421631, + "learning_rate": 2.2383981494867716e-05, + "loss": 0.253, + "step": 5161, + "teacher_loss": 0.2349167764186859 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.22157132625579834, + "learning_rate": 2.2388318635246494e-05, + "loss": 0.2211, + "step": 5162, + "teacher_loss": 0.2210463136434555 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.5379351377487183, + "learning_rate": 2.239265577562527e-05, + "loss": 0.3446, + "step": 5163, + "teacher_loss": 0.3231605291366577 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.37333449721336365, + "learning_rate": 2.239699291600405e-05, + "loss": 0.2115, + "step": 5164, + "teacher_loss": 0.1935131549835205 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.17080548405647278, + "learning_rate": 2.2401330056382827e-05, + "loss": 0.2254, + "step": 5165, + "teacher_loss": 0.2314268946647644 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.7143734693527222, + "learning_rate": 2.2405667196761604e-05, + "loss": 0.345, + "step": 5166, + "teacher_loss": 0.3040086627006531 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.7520779371261597, + "learning_rate": 2.2410004337140382e-05, + "loss": 0.3036, + "step": 5167, + "teacher_loss": 0.2537897229194641 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.684225857257843, + "learning_rate": 2.2414341477519156e-05, + "loss": 0.3007, + "step": 5168, + "teacher_loss": 0.25807303190231323 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.23379717767238617, + "learning_rate": 2.241867861789793e-05, + "loss": 0.1486, + "step": 5169, + "teacher_loss": 0.1391725242137909 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.31053274869918823, + "learning_rate": 2.2423015758276708e-05, + "loss": 0.1724, + "step": 5170, + "teacher_loss": 0.15702611207962036 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.22114913165569305, + "learning_rate": 2.2427352898655486e-05, + "loss": 0.2585, + "step": 5171, + "teacher_loss": 0.26262637972831726 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.4058115482330322, + "learning_rate": 2.2431690039034263e-05, + "loss": 0.2626, + "step": 5172, + "teacher_loss": 0.24673837423324585 + }, + { + "compression_loss": 0.0, + "epoch": 0.93, + "label_loss": 0.13354527950286865, + "learning_rate": 2.243602717941304e-05, + "loss": 0.1955, + "step": 5173, + "teacher_loss": 0.20240341126918793 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.2527647912502289, + "learning_rate": 2.244036431979182e-05, + "loss": 0.239, + "step": 5174, + "teacher_loss": 0.23742276430130005 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.3180329203605652, + "learning_rate": 2.2444701460170596e-05, + "loss": 0.2166, + "step": 5175, + "teacher_loss": 0.20528320968151093 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.7262953519821167, + "learning_rate": 2.2449038600549374e-05, + "loss": 0.5043, + "step": 5176, + "teacher_loss": 0.47957974672317505 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.2496146857738495, + "learning_rate": 2.2453375740928148e-05, + "loss": 0.1712, + "step": 5177, + "teacher_loss": 0.16245608031749725 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4889895021915436, + "learning_rate": 2.2457712881306926e-05, + "loss": 0.2446, + "step": 5178, + "teacher_loss": 0.21742860972881317 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5513525605201721, + "learning_rate": 2.2462050021685703e-05, + "loss": 0.2596, + "step": 5179, + "teacher_loss": 0.22720560431480408 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.3760373890399933, + "learning_rate": 2.2466387162064477e-05, + "loss": 0.29, + "step": 5180, + "teacher_loss": 0.28045833110809326 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.12090438604354858, + "learning_rate": 2.2470724302443255e-05, + "loss": 0.1557, + "step": 5181, + "teacher_loss": 0.15951929986476898 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.44478827714920044, + "learning_rate": 2.2475061442822033e-05, + "loss": 0.2508, + "step": 5182, + "teacher_loss": 0.2292410433292389 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.468021035194397, + "learning_rate": 2.247939858320081e-05, + "loss": 0.3061, + "step": 5183, + "teacher_loss": 0.2880815863609314 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5448011159896851, + "learning_rate": 2.2483735723579588e-05, + "loss": 0.3294, + "step": 5184, + "teacher_loss": 0.3054155111312866 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.36160629987716675, + "learning_rate": 2.2488072863958365e-05, + "loss": 0.2462, + "step": 5185, + "teacher_loss": 0.23333030939102173 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.8166458606719971, + "learning_rate": 2.249241000433714e-05, + "loss": 0.3009, + "step": 5186, + "teacher_loss": 0.24362075328826904 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.34891456365585327, + "learning_rate": 2.2496747144715917e-05, + "loss": 0.2287, + "step": 5187, + "teacher_loss": 0.215294748544693 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.39646828174591064, + "learning_rate": 2.2501084285094695e-05, + "loss": 0.3423, + "step": 5188, + "teacher_loss": 0.3362903892993927 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5243868827819824, + "learning_rate": 2.2505421425473473e-05, + "loss": 0.247, + "step": 5189, + "teacher_loss": 0.21617653965950012 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.378104031085968, + "learning_rate": 2.250975856585225e-05, + "loss": 0.215, + "step": 5190, + "teacher_loss": 0.19687201082706451 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.40269333124160767, + "learning_rate": 2.2514095706231024e-05, + "loss": 0.2041, + "step": 5191, + "teacher_loss": 0.1820824146270752 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5432336330413818, + "learning_rate": 2.2518432846609802e-05, + "loss": 0.2286, + "step": 5192, + "teacher_loss": 0.1936730593442917 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4971905052661896, + "learning_rate": 2.252276998698858e-05, + "loss": 0.2995, + "step": 5193, + "teacher_loss": 0.277488648891449 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4415738880634308, + "learning_rate": 2.2527107127367357e-05, + "loss": 0.2774, + "step": 5194, + "teacher_loss": 0.25916820764541626 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5430472493171692, + "learning_rate": 2.253144426774613e-05, + "loss": 0.2529, + "step": 5195, + "teacher_loss": 0.22064979374408722 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5330978631973267, + "learning_rate": 2.253578140812491e-05, + "loss": 0.5355, + "step": 5196, + "teacher_loss": 0.5358136296272278 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4828476011753082, + "learning_rate": 2.2540118548503687e-05, + "loss": 0.3746, + "step": 5197, + "teacher_loss": 0.36252695322036743 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4799504280090332, + "learning_rate": 2.2544455688882464e-05, + "loss": 0.1987, + "step": 5198, + "teacher_loss": 0.16748881340026855 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.3681102991104126, + "learning_rate": 2.2548792829261242e-05, + "loss": 0.2907, + "step": 5199, + "teacher_loss": 0.2821410298347473 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4468977451324463, + "learning_rate": 2.255312996964002e-05, + "loss": 0.1988, + "step": 5200, + "teacher_loss": 0.17121225595474243 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.6335517168045044, + "learning_rate": 2.2557467110018797e-05, + "loss": 0.3002, + "step": 5201, + "teacher_loss": 0.2632067799568176 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4042230546474457, + "learning_rate": 2.256180425039757e-05, + "loss": 0.2872, + "step": 5202, + "teacher_loss": 0.27421101927757263 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.628432035446167, + "learning_rate": 2.256614139077635e-05, + "loss": 0.291, + "step": 5203, + "teacher_loss": 0.25353002548217773 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.38567131757736206, + "learning_rate": 2.2570478531155123e-05, + "loss": 0.1866, + "step": 5204, + "teacher_loss": 0.16453619301319122 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 1.2500131130218506, + "learning_rate": 2.25748156715339e-05, + "loss": 0.516, + "step": 5205, + "teacher_loss": 0.43444371223449707 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.3733985722064972, + "learning_rate": 2.257915281191268e-05, + "loss": 0.351, + "step": 5206, + "teacher_loss": 0.34854069352149963 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.47947967052459717, + "learning_rate": 2.2583489952291456e-05, + "loss": 0.2227, + "step": 5207, + "teacher_loss": 0.19412267208099365 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.5088632702827454, + "learning_rate": 2.2587827092670234e-05, + "loss": 0.2195, + "step": 5208, + "teacher_loss": 0.18729454278945923 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.46812838315963745, + "learning_rate": 2.259216423304901e-05, + "loss": 0.2869, + "step": 5209, + "teacher_loss": 0.2667706608772278 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.31662237644195557, + "learning_rate": 2.259650137342779e-05, + "loss": 0.2153, + "step": 5210, + "teacher_loss": 0.2040342092514038 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.3427325189113617, + "learning_rate": 2.2600838513806567e-05, + "loss": 0.4275, + "step": 5211, + "teacher_loss": 0.4368712902069092 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.2818562388420105, + "learning_rate": 2.260517565418534e-05, + "loss": 0.1828, + "step": 5212, + "teacher_loss": 0.17174619436264038 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.43525975942611694, + "learning_rate": 2.2609512794564115e-05, + "loss": 0.1729, + "step": 5213, + "teacher_loss": 0.1437588930130005 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.2975597381591797, + "learning_rate": 2.2613849934942893e-05, + "loss": 0.2301, + "step": 5214, + "teacher_loss": 0.222581684589386 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.25309211015701294, + "learning_rate": 2.261818707532167e-05, + "loss": 0.2081, + "step": 5215, + "teacher_loss": 0.20309637486934662 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4849016070365906, + "learning_rate": 2.2622524215700448e-05, + "loss": 0.3044, + "step": 5216, + "teacher_loss": 0.2843630015850067 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.2920071482658386, + "learning_rate": 2.2626861356079226e-05, + "loss": 0.2055, + "step": 5217, + "teacher_loss": 0.1959172785282135 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.6029982566833496, + "learning_rate": 2.2631198496458003e-05, + "loss": 0.2997, + "step": 5218, + "teacher_loss": 0.26605552434921265 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.1946466863155365, + "learning_rate": 2.263553563683678e-05, + "loss": 0.1647, + "step": 5219, + "teacher_loss": 0.16140566766262054 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.6143782734870911, + "learning_rate": 2.263987277721556e-05, + "loss": 0.2686, + "step": 5220, + "teacher_loss": 0.23017823696136475 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.21290603280067444, + "learning_rate": 2.2644209917594333e-05, + "loss": 0.1957, + "step": 5221, + "teacher_loss": 0.1938113272190094 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.4121314287185669, + "learning_rate": 2.264854705797311e-05, + "loss": 0.2165, + "step": 5222, + "teacher_loss": 0.19480614364147186 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.47370651364326477, + "learning_rate": 2.2652884198351888e-05, + "loss": 0.2302, + "step": 5223, + "teacher_loss": 0.20313358306884766 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.2794271409511566, + "learning_rate": 2.2657221338730662e-05, + "loss": 0.2575, + "step": 5224, + "teacher_loss": 0.25501489639282227 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.347237765789032, + "learning_rate": 2.266155847910944e-05, + "loss": 0.2666, + "step": 5225, + "teacher_loss": 0.2576659917831421 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.8451735973358154, + "learning_rate": 2.2665895619488218e-05, + "loss": 0.2514, + "step": 5226, + "teacher_loss": 0.18546950817108154 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 1.0351994037628174, + "learning_rate": 2.2670232759866995e-05, + "loss": 0.4788, + "step": 5227, + "teacher_loss": 0.4170287847518921 + }, + { + "compression_loss": 0.0, + "epoch": 0.94, + "label_loss": 0.6906437873840332, + "learning_rate": 2.2674569900245773e-05, + "loss": 0.2939, + "step": 5228, + "teacher_loss": 0.24980652332305908 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.9929763078689575, + "learning_rate": 2.267890704062455e-05, + "loss": 0.2817, + "step": 5229, + "teacher_loss": 0.2026483565568924 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.7074835300445557, + "learning_rate": 2.2683244181003325e-05, + "loss": 0.2788, + "step": 5230, + "teacher_loss": 0.23115208745002747 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.3571232557296753, + "learning_rate": 2.2687581321382102e-05, + "loss": 0.2962, + "step": 5231, + "teacher_loss": 0.2894464433193207 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.42810314893722534, + "learning_rate": 2.269191846176088e-05, + "loss": 0.2194, + "step": 5232, + "teacher_loss": 0.1962508261203766 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.2567265033721924, + "learning_rate": 2.2696255602139657e-05, + "loss": 0.2423, + "step": 5233, + "teacher_loss": 0.2407262623310089 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.6598492860794067, + "learning_rate": 2.2700592742518435e-05, + "loss": 0.3227, + "step": 5234, + "teacher_loss": 0.2852838933467865 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.5134366750717163, + "learning_rate": 2.270492988289721e-05, + "loss": 0.3148, + "step": 5235, + "teacher_loss": 0.2927519381046295 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.2056937962770462, + "learning_rate": 2.2709267023275987e-05, + "loss": 0.2118, + "step": 5236, + "teacher_loss": 0.21250778436660767 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.5422415733337402, + "learning_rate": 2.2713604163654765e-05, + "loss": 0.2282, + "step": 5237, + "teacher_loss": 0.19330623745918274 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.5009069442749023, + "learning_rate": 2.2717941304033542e-05, + "loss": 0.252, + "step": 5238, + "teacher_loss": 0.2243586629629135 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.37588587403297424, + "learning_rate": 2.2722278444412316e-05, + "loss": 0.2275, + "step": 5239, + "teacher_loss": 0.2109965682029724 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.4359273314476013, + "learning_rate": 2.2726615584791094e-05, + "loss": 0.2677, + "step": 5240, + "teacher_loss": 0.2489645779132843 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.395673930644989, + "learning_rate": 2.273095272516987e-05, + "loss": 0.3863, + "step": 5241, + "teacher_loss": 0.3852764368057251 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.18563883006572723, + "learning_rate": 2.273528986554865e-05, + "loss": 0.2155, + "step": 5242, + "teacher_loss": 0.21880190074443817 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.23733291029930115, + "learning_rate": 2.2739627005927427e-05, + "loss": 0.2066, + "step": 5243, + "teacher_loss": 0.2031739205121994 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.3128124475479126, + "learning_rate": 2.2743964146306205e-05, + "loss": 0.1834, + "step": 5244, + "teacher_loss": 0.16898566484451294 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.6251288652420044, + "learning_rate": 2.274830128668498e-05, + "loss": 0.2973, + "step": 5245, + "teacher_loss": 0.2608697712421417 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.41884487867355347, + "learning_rate": 2.2752638427063756e-05, + "loss": 0.3151, + "step": 5246, + "teacher_loss": 0.3035174012184143 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.37374347448349, + "learning_rate": 2.2756975567442534e-05, + "loss": 0.2212, + "step": 5247, + "teacher_loss": 0.20426736772060394 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.3473066985607147, + "learning_rate": 2.2761312707821308e-05, + "loss": 0.2591, + "step": 5248, + "teacher_loss": 0.24926765263080597 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.49194997549057007, + "learning_rate": 2.2765649848200086e-05, + "loss": 0.2776, + "step": 5249, + "teacher_loss": 0.25381016731262207 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.24947193264961243, + "learning_rate": 2.2769986988578863e-05, + "loss": 0.2409, + "step": 5250, + "teacher_loss": 0.2399916797876358 + }, + { + "epoch": 0.95, + "eval_exact_match": 79.63103122043519, + "eval_f1": 87.18634119235962, + "step": 5250 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.2812878489494324, + "learning_rate": 2.277432412895764e-05, + "loss": 0.4144, + "step": 5251, + "teacher_loss": 0.42915308475494385 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.7196929454803467, + "learning_rate": 2.277866126933642e-05, + "loss": 0.3573, + "step": 5252, + "teacher_loss": 0.3170585334300995 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.39043283462524414, + "learning_rate": 2.2782998409715196e-05, + "loss": 0.2008, + "step": 5253, + "teacher_loss": 0.1797480583190918 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.2606884837150574, + "learning_rate": 2.2787335550093974e-05, + "loss": 0.3089, + "step": 5254, + "teacher_loss": 0.3142600357532501 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.16645267605781555, + "learning_rate": 2.279167269047275e-05, + "loss": 0.1668, + "step": 5255, + "teacher_loss": 0.16688109934329987 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.42375534772872925, + "learning_rate": 2.2796009830851526e-05, + "loss": 0.2011, + "step": 5256, + "teacher_loss": 0.1763540804386139 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.45146262645721436, + "learning_rate": 2.28003469712303e-05, + "loss": 0.2372, + "step": 5257, + "teacher_loss": 0.21333810687065125 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.8403382301330566, + "learning_rate": 2.2804684111609078e-05, + "loss": 0.4967, + "step": 5258, + "teacher_loss": 0.4585610628128052 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.33543968200683594, + "learning_rate": 2.2809021251987855e-05, + "loss": 0.1915, + "step": 5259, + "teacher_loss": 0.17547093331813812 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.28520306944847107, + "learning_rate": 2.2813358392366633e-05, + "loss": 0.2349, + "step": 5260, + "teacher_loss": 0.229332834482193 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.16511911153793335, + "learning_rate": 2.281769553274541e-05, + "loss": 0.1895, + "step": 5261, + "teacher_loss": 0.1921992152929306 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.623196542263031, + "learning_rate": 2.2822032673124188e-05, + "loss": 0.3784, + "step": 5262, + "teacher_loss": 0.3511947989463806 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.422479510307312, + "learning_rate": 2.2826369813502966e-05, + "loss": 0.2691, + "step": 5263, + "teacher_loss": 0.2520909905433655 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.5880293846130371, + "learning_rate": 2.2830706953881743e-05, + "loss": 0.4016, + "step": 5264, + "teacher_loss": 0.38090479373931885 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.5907383561134338, + "learning_rate": 2.2835044094260518e-05, + "loss": 0.2803, + "step": 5265, + "teacher_loss": 0.24582664668560028 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.388629674911499, + "learning_rate": 2.2839381234639295e-05, + "loss": 0.2379, + "step": 5266, + "teacher_loss": 0.2211308777332306 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.28395259380340576, + "learning_rate": 2.284371837501807e-05, + "loss": 0.2046, + "step": 5267, + "teacher_loss": 0.19577160477638245 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.30127406120300293, + "learning_rate": 2.2848055515396847e-05, + "loss": 0.2127, + "step": 5268, + "teacher_loss": 0.20289164781570435 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.4572233259677887, + "learning_rate": 2.2852392655775625e-05, + "loss": 0.2669, + "step": 5269, + "teacher_loss": 0.2457369863986969 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.6482110619544983, + "learning_rate": 2.2856729796154402e-05, + "loss": 0.3682, + "step": 5270, + "teacher_loss": 0.3370552659034729 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.4635393023490906, + "learning_rate": 2.286106693653318e-05, + "loss": 0.1988, + "step": 5271, + "teacher_loss": 0.16938892006874084 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.7569433450698853, + "learning_rate": 2.2865404076911958e-05, + "loss": 0.2915, + "step": 5272, + "teacher_loss": 0.2397874891757965 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.2106567919254303, + "learning_rate": 2.2869741217290735e-05, + "loss": 0.2392, + "step": 5273, + "teacher_loss": 0.2423810511827469 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.3675040006637573, + "learning_rate": 2.287407835766951e-05, + "loss": 0.2719, + "step": 5274, + "teacher_loss": 0.26126301288604736 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.372201532125473, + "learning_rate": 2.2878415498048287e-05, + "loss": 0.398, + "step": 5275, + "teacher_loss": 0.40084031224250793 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.391643226146698, + "learning_rate": 2.2882752638427065e-05, + "loss": 0.2846, + "step": 5276, + "teacher_loss": 0.27274852991104126 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.42954087257385254, + "learning_rate": 2.2887089778805842e-05, + "loss": 0.2577, + "step": 5277, + "teacher_loss": 0.23864296078681946 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 1.2149486541748047, + "learning_rate": 2.2891426919184617e-05, + "loss": 0.4275, + "step": 5278, + "teacher_loss": 0.3400581479072571 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.3806818723678589, + "learning_rate": 2.2895764059563394e-05, + "loss": 0.2197, + "step": 5279, + "teacher_loss": 0.20180785655975342 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.767670750617981, + "learning_rate": 2.2900101199942172e-05, + "loss": 0.4487, + "step": 5280, + "teacher_loss": 0.4132162630558014 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.711574912071228, + "learning_rate": 2.290443834032095e-05, + "loss": 0.3307, + "step": 5281, + "teacher_loss": 0.2883613705635071 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.5144262909889221, + "learning_rate": 2.2908775480699727e-05, + "loss": 0.2793, + "step": 5282, + "teacher_loss": 0.2531528174877167 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.31219446659088135, + "learning_rate": 2.29131126210785e-05, + "loss": 0.2237, + "step": 5283, + "teacher_loss": 0.2138577699661255 + }, + { + "compression_loss": 0.0, + "epoch": 0.95, + "label_loss": 0.4874677062034607, + "learning_rate": 2.291744976145728e-05, + "loss": 0.2473, + "step": 5284, + "teacher_loss": 0.2205744981765747 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.44611963629722595, + "learning_rate": 2.2921786901836057e-05, + "loss": 0.2109, + "step": 5285, + "teacher_loss": 0.18478459119796753 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.3108885884284973, + "learning_rate": 2.2926124042214834e-05, + "loss": 0.1765, + "step": 5286, + "teacher_loss": 0.16161490976810455 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.434268057346344, + "learning_rate": 2.2930461182593612e-05, + "loss": 0.259, + "step": 5287, + "teacher_loss": 0.23951254785060883 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.8003425598144531, + "learning_rate": 2.293479832297239e-05, + "loss": 0.345, + "step": 5288, + "teacher_loss": 0.29438310861587524 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.6043721437454224, + "learning_rate": 2.2939135463351164e-05, + "loss": 0.2608, + "step": 5289, + "teacher_loss": 0.2226196825504303 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 1.0015742778778076, + "learning_rate": 2.294347260372994e-05, + "loss": 0.2933, + "step": 5290, + "teacher_loss": 0.21458885073661804 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.5063802599906921, + "learning_rate": 2.294780974410872e-05, + "loss": 0.2818, + "step": 5291, + "teacher_loss": 0.2568388879299164 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.48456746339797974, + "learning_rate": 2.2952146884487493e-05, + "loss": 0.2843, + "step": 5292, + "teacher_loss": 0.2620980739593506 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.537642776966095, + "learning_rate": 2.295648402486627e-05, + "loss": 0.2705, + "step": 5293, + "teacher_loss": 0.24077317118644714 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2621435821056366, + "learning_rate": 2.296082116524505e-05, + "loss": 0.2107, + "step": 5294, + "teacher_loss": 0.20498394966125488 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.8919625282287598, + "learning_rate": 2.2965158305623826e-05, + "loss": 0.3275, + "step": 5295, + "teacher_loss": 0.26477229595184326 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.4959392547607422, + "learning_rate": 2.2969495446002604e-05, + "loss": 0.2496, + "step": 5296, + "teacher_loss": 0.22228381037712097 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.24925217032432556, + "learning_rate": 2.297383258638138e-05, + "loss": 0.1626, + "step": 5297, + "teacher_loss": 0.1529797911643982 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.7525221109390259, + "learning_rate": 2.297816972676016e-05, + "loss": 0.3961, + "step": 5298, + "teacher_loss": 0.3565044403076172 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.14059105515480042, + "learning_rate": 2.2982506867138936e-05, + "loss": 0.2874, + "step": 5299, + "teacher_loss": 0.3036627173423767 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.408150315284729, + "learning_rate": 2.298684400751771e-05, + "loss": 0.3686, + "step": 5300, + "teacher_loss": 0.36421453952789307 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.4785027801990509, + "learning_rate": 2.2991181147896485e-05, + "loss": 0.3436, + "step": 5301, + "teacher_loss": 0.3285689651966095 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.19875618815422058, + "learning_rate": 2.2995518288275263e-05, + "loss": 0.1888, + "step": 5302, + "teacher_loss": 0.18765440583229065 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.8475450873374939, + "learning_rate": 2.299985542865404e-05, + "loss": 0.4318, + "step": 5303, + "teacher_loss": 0.38558512926101685 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.49890416860580444, + "learning_rate": 2.3004192569032818e-05, + "loss": 0.3101, + "step": 5304, + "teacher_loss": 0.28915178775787354 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.13040488958358765, + "learning_rate": 2.3008529709411595e-05, + "loss": 0.1486, + "step": 5305, + "teacher_loss": 0.15066519379615784 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.5142734050750732, + "learning_rate": 2.3012866849790373e-05, + "loss": 0.3032, + "step": 5306, + "teacher_loss": 0.27975788712501526 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2963438630104065, + "learning_rate": 2.301720399016915e-05, + "loss": 0.2805, + "step": 5307, + "teacher_loss": 0.27878519892692566 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2815457582473755, + "learning_rate": 2.302154113054793e-05, + "loss": 0.3214, + "step": 5308, + "teacher_loss": 0.3258435130119324 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.49866294860839844, + "learning_rate": 2.3025878270926703e-05, + "loss": 0.2862, + "step": 5309, + "teacher_loss": 0.2626439929008484 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.17292019724845886, + "learning_rate": 2.303021541130548e-05, + "loss": 0.2801, + "step": 5310, + "teacher_loss": 0.29195380210876465 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2461528778076172, + "learning_rate": 2.3034552551684254e-05, + "loss": 0.1623, + "step": 5311, + "teacher_loss": 0.15302368998527527 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.41084691882133484, + "learning_rate": 2.3038889692063032e-05, + "loss": 0.2453, + "step": 5312, + "teacher_loss": 0.22686000168323517 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.29898664355278015, + "learning_rate": 2.304322683244181e-05, + "loss": 0.2638, + "step": 5313, + "teacher_loss": 0.2598741352558136 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.37229400873184204, + "learning_rate": 2.3047563972820587e-05, + "loss": 0.293, + "step": 5314, + "teacher_loss": 0.2841700613498688 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.4559156894683838, + "learning_rate": 2.3051901113199365e-05, + "loss": 0.3119, + "step": 5315, + "teacher_loss": 0.2959163784980774 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2551038861274719, + "learning_rate": 2.3056238253578143e-05, + "loss": 0.3094, + "step": 5316, + "teacher_loss": 0.3154229521751404 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2521950602531433, + "learning_rate": 2.306057539395692e-05, + "loss": 0.1843, + "step": 5317, + "teacher_loss": 0.17671126127243042 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.2045522928237915, + "learning_rate": 2.3064912534335694e-05, + "loss": 0.1451, + "step": 5318, + "teacher_loss": 0.138482466340065 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.30991801619529724, + "learning_rate": 2.3069249674714472e-05, + "loss": 0.1954, + "step": 5319, + "teacher_loss": 0.1826450526714325 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 1.071541666984558, + "learning_rate": 2.307358681509325e-05, + "loss": 0.3195, + "step": 5320, + "teacher_loss": 0.23588864505290985 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.29259878396987915, + "learning_rate": 2.3077923955472027e-05, + "loss": 0.1871, + "step": 5321, + "teacher_loss": 0.17534509301185608 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.48258674144744873, + "learning_rate": 2.30822610958508e-05, + "loss": 0.239, + "step": 5322, + "teacher_loss": 0.21196448802947998 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.5426325798034668, + "learning_rate": 2.308659823622958e-05, + "loss": 0.3001, + "step": 5323, + "teacher_loss": 0.2731361985206604 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.4768981337547302, + "learning_rate": 2.3090935376608357e-05, + "loss": 0.2253, + "step": 5324, + "teacher_loss": 0.19734270870685577 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.6536442041397095, + "learning_rate": 2.3095272516987134e-05, + "loss": 0.3475, + "step": 5325, + "teacher_loss": 0.3134285807609558 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.7678326368331909, + "learning_rate": 2.3099609657365912e-05, + "loss": 0.228, + "step": 5326, + "teacher_loss": 0.16804514825344086 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.21201226115226746, + "learning_rate": 2.3103946797744686e-05, + "loss": 0.1774, + "step": 5327, + "teacher_loss": 0.1735735535621643 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.5550143122673035, + "learning_rate": 2.3108283938123464e-05, + "loss": 0.2766, + "step": 5328, + "teacher_loss": 0.24567165970802307 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.33735334873199463, + "learning_rate": 2.311262107850224e-05, + "loss": 0.3393, + "step": 5329, + "teacher_loss": 0.339538037776947 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.37199753522872925, + "learning_rate": 2.311695821888102e-05, + "loss": 0.2311, + "step": 5330, + "teacher_loss": 0.21543939411640167 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.5593907833099365, + "learning_rate": 2.3121295359259797e-05, + "loss": 0.2656, + "step": 5331, + "teacher_loss": 0.23299317061901093 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.31990504264831543, + "learning_rate": 2.3125632499638574e-05, + "loss": 0.234, + "step": 5332, + "teacher_loss": 0.22443200647830963 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.3720256984233856, + "learning_rate": 2.312996964001735e-05, + "loss": 0.1982, + "step": 5333, + "teacher_loss": 0.17883452773094177 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.31332194805145264, + "learning_rate": 2.3134306780396126e-05, + "loss": 0.2697, + "step": 5334, + "teacher_loss": 0.2649003863334656 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.6613035202026367, + "learning_rate": 2.3138643920774904e-05, + "loss": 0.5597, + "step": 5335, + "teacher_loss": 0.548446536064148 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.23744845390319824, + "learning_rate": 2.3142981061153678e-05, + "loss": 0.2066, + "step": 5336, + "teacher_loss": 0.20318594574928284 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.4818035960197449, + "learning_rate": 2.3147318201532456e-05, + "loss": 0.2323, + "step": 5337, + "teacher_loss": 0.20456373691558838 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.42452001571655273, + "learning_rate": 2.3151655341911233e-05, + "loss": 0.2651, + "step": 5338, + "teacher_loss": 0.24737368524074554 + }, + { + "compression_loss": 0.0, + "epoch": 0.96, + "label_loss": 0.8728054761886597, + "learning_rate": 2.315599248229001e-05, + "loss": 0.4228, + "step": 5339, + "teacher_loss": 0.37277162075042725 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.8660918474197388, + "learning_rate": 2.316032962266879e-05, + "loss": 0.3203, + "step": 5340, + "teacher_loss": 0.25968992710113525 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.22465132176876068, + "learning_rate": 2.3164666763047566e-05, + "loss": 0.2431, + "step": 5341, + "teacher_loss": 0.24515533447265625 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 1.1361886262893677, + "learning_rate": 2.3169003903426344e-05, + "loss": 0.4531, + "step": 5342, + "teacher_loss": 0.37723132967948914 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.25529032945632935, + "learning_rate": 2.3173341043805118e-05, + "loss": 0.2163, + "step": 5343, + "teacher_loss": 0.21191942691802979 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.2200397253036499, + "learning_rate": 2.3177678184183896e-05, + "loss": 0.1977, + "step": 5344, + "teacher_loss": 0.19520539045333862 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.16571789979934692, + "learning_rate": 2.318201532456267e-05, + "loss": 0.2261, + "step": 5345, + "teacher_loss": 0.2327772080898285 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.2317473441362381, + "learning_rate": 2.3186352464941447e-05, + "loss": 0.2094, + "step": 5346, + "teacher_loss": 0.2068655639886856 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.16925732791423798, + "learning_rate": 2.3190689605320225e-05, + "loss": 0.2512, + "step": 5347, + "teacher_loss": 0.26027965545654297 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.49995240569114685, + "learning_rate": 2.3195026745699003e-05, + "loss": 0.2787, + "step": 5348, + "teacher_loss": 0.2541377544403076 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.6650125980377197, + "learning_rate": 2.319936388607778e-05, + "loss": 0.5539, + "step": 5349, + "teacher_loss": 0.5415416955947876 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.49391233921051025, + "learning_rate": 2.3203701026456558e-05, + "loss": 0.2414, + "step": 5350, + "teacher_loss": 0.21338112652301788 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.1688418686389923, + "learning_rate": 2.3208038166835336e-05, + "loss": 0.1694, + "step": 5351, + "teacher_loss": 0.1694801300764084 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.5063471794128418, + "learning_rate": 2.3212375307214113e-05, + "loss": 0.3839, + "step": 5352, + "teacher_loss": 0.37028831243515015 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.22846029698848724, + "learning_rate": 2.3216712447592887e-05, + "loss": 0.2592, + "step": 5353, + "teacher_loss": 0.26265761256217957 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.4165247678756714, + "learning_rate": 2.322104958797166e-05, + "loss": 0.1858, + "step": 5354, + "teacher_loss": 0.16014957427978516 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.2282799482345581, + "learning_rate": 2.322538672835044e-05, + "loss": 0.1884, + "step": 5355, + "teacher_loss": 0.18399415910243988 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.42540442943573, + "learning_rate": 2.3229723868729217e-05, + "loss": 0.232, + "step": 5356, + "teacher_loss": 0.21055945754051208 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.5764268040657043, + "learning_rate": 2.3234061009107995e-05, + "loss": 0.2716, + "step": 5357, + "teacher_loss": 0.23769541084766388 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.6497641801834106, + "learning_rate": 2.3238398149486772e-05, + "loss": 0.2971, + "step": 5358, + "teacher_loss": 0.25793084502220154 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.6197007894515991, + "learning_rate": 2.324273528986555e-05, + "loss": 0.2876, + "step": 5359, + "teacher_loss": 0.25074994564056396 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 1.1256108283996582, + "learning_rate": 2.3247072430244327e-05, + "loss": 0.3872, + "step": 5360, + "teacher_loss": 0.3051164746284485 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.5118353366851807, + "learning_rate": 2.3251409570623105e-05, + "loss": 0.3214, + "step": 5361, + "teacher_loss": 0.30025580525398254 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.48860234022140503, + "learning_rate": 2.325574671100188e-05, + "loss": 0.2487, + "step": 5362, + "teacher_loss": 0.22203359007835388 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.5370458364486694, + "learning_rate": 2.3260083851380657e-05, + "loss": 0.2413, + "step": 5363, + "teacher_loss": 0.20849084854125977 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.41701263189315796, + "learning_rate": 2.3264420991759434e-05, + "loss": 0.3354, + "step": 5364, + "teacher_loss": 0.32630735635757446 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.36297526955604553, + "learning_rate": 2.326875813213821e-05, + "loss": 0.2577, + "step": 5365, + "teacher_loss": 0.245981365442276 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.25561586022377014, + "learning_rate": 2.3273095272516986e-05, + "loss": 0.1849, + "step": 5366, + "teacher_loss": 0.17703570425510406 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.2053670585155487, + "learning_rate": 2.3277432412895764e-05, + "loss": 0.2262, + "step": 5367, + "teacher_loss": 0.22848904132843018 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 1.0243103504180908, + "learning_rate": 2.328176955327454e-05, + "loss": 0.4653, + "step": 5368, + "teacher_loss": 0.4031349718570709 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.4429263770580292, + "learning_rate": 2.328610669365332e-05, + "loss": 0.2364, + "step": 5369, + "teacher_loss": 0.21349795162677765 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.2591411769390106, + "learning_rate": 2.3290443834032097e-05, + "loss": 0.2628, + "step": 5370, + "teacher_loss": 0.26318442821502686 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.1070953831076622, + "learning_rate": 2.329478097441087e-05, + "loss": 0.1423, + "step": 5371, + "teacher_loss": 0.14622879028320312 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.3947625160217285, + "learning_rate": 2.329911811478965e-05, + "loss": 0.3047, + "step": 5372, + "teacher_loss": 0.29472678899765015 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.16454137861728668, + "learning_rate": 2.3303455255168426e-05, + "loss": 0.2103, + "step": 5373, + "teacher_loss": 0.21540237963199615 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.37359437346458435, + "learning_rate": 2.3307792395547204e-05, + "loss": 0.2136, + "step": 5374, + "teacher_loss": 0.1958184838294983 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.43434181809425354, + "learning_rate": 2.331212953592598e-05, + "loss": 0.4877, + "step": 5375, + "teacher_loss": 0.49364525079727173 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.2762134075164795, + "learning_rate": 2.3316466676304756e-05, + "loss": 0.1919, + "step": 5376, + "teacher_loss": 0.18257302045822144 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.19992363452911377, + "learning_rate": 2.3320803816683533e-05, + "loss": 0.1864, + "step": 5377, + "teacher_loss": 0.1848490685224533 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.33336132764816284, + "learning_rate": 2.332514095706231e-05, + "loss": 0.3295, + "step": 5378, + "teacher_loss": 0.32908475399017334 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.3430309295654297, + "learning_rate": 2.332947809744109e-05, + "loss": 0.3666, + "step": 5379, + "teacher_loss": 0.369229257106781 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.18287985026836395, + "learning_rate": 2.3333815237819863e-05, + "loss": 0.2763, + "step": 5380, + "teacher_loss": 0.28670966625213623 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.49821245670318604, + "learning_rate": 2.333815237819864e-05, + "loss": 0.2686, + "step": 5381, + "teacher_loss": 0.24310877919197083 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.3554704189300537, + "learning_rate": 2.3342489518577418e-05, + "loss": 0.3191, + "step": 5382, + "teacher_loss": 0.31502699851989746 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.7280396223068237, + "learning_rate": 2.3346826658956196e-05, + "loss": 0.2718, + "step": 5383, + "teacher_loss": 0.22114010155200958 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.6777359247207642, + "learning_rate": 2.3351163799334973e-05, + "loss": 0.4402, + "step": 5384, + "teacher_loss": 0.41383713483810425 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.38362541794776917, + "learning_rate": 2.335550093971375e-05, + "loss": 0.2359, + "step": 5385, + "teacher_loss": 0.2194925844669342 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.42113980650901794, + "learning_rate": 2.335983808009253e-05, + "loss": 0.2555, + "step": 5386, + "teacher_loss": 0.2371484637260437 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.5297960042953491, + "learning_rate": 2.3364175220471303e-05, + "loss": 0.3073, + "step": 5387, + "teacher_loss": 0.28261566162109375 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.876060962677002, + "learning_rate": 2.336851236085008e-05, + "loss": 0.3757, + "step": 5388, + "teacher_loss": 0.32011473178863525 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.5322084426879883, + "learning_rate": 2.3372849501228855e-05, + "loss": 0.2739, + "step": 5389, + "teacher_loss": 0.24519816040992737 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.3035171926021576, + "learning_rate": 2.3377186641607632e-05, + "loss": 0.2306, + "step": 5390, + "teacher_loss": 0.222476065158844 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.4409346580505371, + "learning_rate": 2.338152378198641e-05, + "loss": 0.2284, + "step": 5391, + "teacher_loss": 0.204797625541687 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.39762794971466064, + "learning_rate": 2.3385860922365188e-05, + "loss": 0.2453, + "step": 5392, + "teacher_loss": 0.22839441895484924 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.4769248366355896, + "learning_rate": 2.3390198062743965e-05, + "loss": 0.2405, + "step": 5393, + "teacher_loss": 0.21422025561332703 + }, + { + "compression_loss": 0.0, + "epoch": 0.97, + "label_loss": 0.3190556764602661, + "learning_rate": 2.3394535203122743e-05, + "loss": 0.4323, + "step": 5394, + "teacher_loss": 0.44491666555404663 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4160124361515045, + "learning_rate": 2.339887234350152e-05, + "loss": 0.2271, + "step": 5395, + "teacher_loss": 0.20611083507537842 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.8731113076210022, + "learning_rate": 2.3403209483880298e-05, + "loss": 0.4674, + "step": 5396, + "teacher_loss": 0.42231184244155884 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 1.1101782321929932, + "learning_rate": 2.3407546624259076e-05, + "loss": 0.2795, + "step": 5397, + "teacher_loss": 0.1872110664844513 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.3266570568084717, + "learning_rate": 2.3411883764637847e-05, + "loss": 0.2312, + "step": 5398, + "teacher_loss": 0.22059719264507294 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.13921064138412476, + "learning_rate": 2.3416220905016624e-05, + "loss": 0.1729, + "step": 5399, + "teacher_loss": 0.1766607165336609 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.25814932584762573, + "learning_rate": 2.3420558045395402e-05, + "loss": 0.2054, + "step": 5400, + "teacher_loss": 0.19951960444450378 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4061494469642639, + "learning_rate": 2.342489518577418e-05, + "loss": 0.2928, + "step": 5401, + "teacher_loss": 0.2801753580570221 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2980736196041107, + "learning_rate": 2.3429232326152957e-05, + "loss": 0.2005, + "step": 5402, + "teacher_loss": 0.1896316260099411 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.5789540410041809, + "learning_rate": 2.3433569466531735e-05, + "loss": 0.2807, + "step": 5403, + "teacher_loss": 0.2475496232509613 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2554803192615509, + "learning_rate": 2.3437906606910512e-05, + "loss": 0.1833, + "step": 5404, + "teacher_loss": 0.1752779185771942 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.24882067739963531, + "learning_rate": 2.344224374728929e-05, + "loss": 0.2396, + "step": 5405, + "teacher_loss": 0.23862558603286743 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4301232695579529, + "learning_rate": 2.3446580887668064e-05, + "loss": 0.1983, + "step": 5406, + "teacher_loss": 0.17251065373420715 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.36738380789756775, + "learning_rate": 2.3450918028046842e-05, + "loss": 0.2176, + "step": 5407, + "teacher_loss": 0.20091985166072845 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.24191394448280334, + "learning_rate": 2.345525516842562e-05, + "loss": 0.2015, + "step": 5408, + "teacher_loss": 0.19699940085411072 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.5004382133483887, + "learning_rate": 2.3459592308804394e-05, + "loss": 0.2753, + "step": 5409, + "teacher_loss": 0.2502540647983551 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.47690683603286743, + "learning_rate": 2.346392944918317e-05, + "loss": 0.318, + "step": 5410, + "teacher_loss": 0.30029550194740295 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.6977181434631348, + "learning_rate": 2.346826658956195e-05, + "loss": 0.2575, + "step": 5411, + "teacher_loss": 0.20860819518566132 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.43647173047065735, + "learning_rate": 2.3472603729940726e-05, + "loss": 0.2786, + "step": 5412, + "teacher_loss": 0.2610396444797516 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2459789663553238, + "learning_rate": 2.3476940870319504e-05, + "loss": 0.2944, + "step": 5413, + "teacher_loss": 0.29983416199684143 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2712249457836151, + "learning_rate": 2.3481278010698282e-05, + "loss": 0.1827, + "step": 5414, + "teacher_loss": 0.17285102605819702 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.5777601003646851, + "learning_rate": 2.3485615151077056e-05, + "loss": 0.3939, + "step": 5415, + "teacher_loss": 0.37349873781204224 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.302334725856781, + "learning_rate": 2.3489952291455834e-05, + "loss": 0.1952, + "step": 5416, + "teacher_loss": 0.18331976234912872 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4470178186893463, + "learning_rate": 2.349428943183461e-05, + "loss": 0.3042, + "step": 5417, + "teacher_loss": 0.28837352991104126 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4125959575176239, + "learning_rate": 2.349862657221339e-05, + "loss": 0.3353, + "step": 5418, + "teacher_loss": 0.3267241418361664 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.30898982286453247, + "learning_rate": 2.3502963712592166e-05, + "loss": 0.2344, + "step": 5419, + "teacher_loss": 0.22606824338436127 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.3123939633369446, + "learning_rate": 2.350730085297094e-05, + "loss": 0.2267, + "step": 5420, + "teacher_loss": 0.21720454096794128 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2864669859409332, + "learning_rate": 2.3511637993349718e-05, + "loss": 0.206, + "step": 5421, + "teacher_loss": 0.19707578420639038 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.153743714094162, + "learning_rate": 2.3515975133728496e-05, + "loss": 0.2056, + "step": 5422, + "teacher_loss": 0.21133500337600708 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.34247249364852905, + "learning_rate": 2.3520312274107274e-05, + "loss": 0.2294, + "step": 5423, + "teacher_loss": 0.2168051153421402 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.8783653974533081, + "learning_rate": 2.3524649414486048e-05, + "loss": 0.3171, + "step": 5424, + "teacher_loss": 0.2546904683113098 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.3593134880065918, + "learning_rate": 2.3528986554864825e-05, + "loss": 0.3258, + "step": 5425, + "teacher_loss": 0.3220583200454712 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4042368531227112, + "learning_rate": 2.3533323695243603e-05, + "loss": 0.2929, + "step": 5426, + "teacher_loss": 0.2805687189102173 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.381625771522522, + "learning_rate": 2.353766083562238e-05, + "loss": 0.2129, + "step": 5427, + "teacher_loss": 0.19409725069999695 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.7073277235031128, + "learning_rate": 2.3541997976001158e-05, + "loss": 0.2699, + "step": 5428, + "teacher_loss": 0.2212734818458557 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2719656229019165, + "learning_rate": 2.3546335116379936e-05, + "loss": 0.1623, + "step": 5429, + "teacher_loss": 0.15014883875846863 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.6445224285125732, + "learning_rate": 2.3550672256758713e-05, + "loss": 0.3669, + "step": 5430, + "teacher_loss": 0.3360990881919861 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4739958941936493, + "learning_rate": 2.3555009397137488e-05, + "loss": 0.2476, + "step": 5431, + "teacher_loss": 0.22244848310947418 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 1.4296815395355225, + "learning_rate": 2.3559346537516265e-05, + "loss": 0.4581, + "step": 5432, + "teacher_loss": 0.3500993549823761 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.42647045850753784, + "learning_rate": 2.356368367789504e-05, + "loss": 0.2438, + "step": 5433, + "teacher_loss": 0.22350388765335083 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.44229501485824585, + "learning_rate": 2.3568020818273817e-05, + "loss": 0.1999, + "step": 5434, + "teacher_loss": 0.17298519611358643 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 1.0484583377838135, + "learning_rate": 2.3572357958652595e-05, + "loss": 0.2517, + "step": 5435, + "teacher_loss": 0.16312196850776672 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.9945250749588013, + "learning_rate": 2.3576695099031372e-05, + "loss": 0.3356, + "step": 5436, + "teacher_loss": 0.26233339309692383 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.5502623915672302, + "learning_rate": 2.358103223941015e-05, + "loss": 0.2395, + "step": 5437, + "teacher_loss": 0.20502164959907532 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.6483927369117737, + "learning_rate": 2.3585369379788928e-05, + "loss": 0.2247, + "step": 5438, + "teacher_loss": 0.1776670217514038 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4235118627548218, + "learning_rate": 2.3589706520167705e-05, + "loss": 0.311, + "step": 5439, + "teacher_loss": 0.2985331416130066 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.6320832371711731, + "learning_rate": 2.3594043660546483e-05, + "loss": 0.3292, + "step": 5440, + "teacher_loss": 0.2955858111381531 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.25183913111686707, + "learning_rate": 2.3598380800925257e-05, + "loss": 0.2193, + "step": 5441, + "teacher_loss": 0.2156939059495926 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.3258577883243561, + "learning_rate": 2.360271794130403e-05, + "loss": 0.2287, + "step": 5442, + "teacher_loss": 0.21786652505397797 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.32024165987968445, + "learning_rate": 2.360705508168281e-05, + "loss": 0.232, + "step": 5443, + "teacher_loss": 0.2222200334072113 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.27019447088241577, + "learning_rate": 2.3611392222061587e-05, + "loss": 0.2203, + "step": 5444, + "teacher_loss": 0.2147570550441742 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.6908140778541565, + "learning_rate": 2.3615729362440364e-05, + "loss": 0.3519, + "step": 5445, + "teacher_loss": 0.31429553031921387 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.24531099200248718, + "learning_rate": 2.3620066502819142e-05, + "loss": 0.1979, + "step": 5446, + "teacher_loss": 0.19257938861846924 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.27792447805404663, + "learning_rate": 2.362440364319792e-05, + "loss": 0.2639, + "step": 5447, + "teacher_loss": 0.26233696937561035 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.4228118658065796, + "learning_rate": 2.3628740783576697e-05, + "loss": 0.2643, + "step": 5448, + "teacher_loss": 0.24667519330978394 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.6106827259063721, + "learning_rate": 2.3633077923955475e-05, + "loss": 0.2619, + "step": 5449, + "teacher_loss": 0.22319769859313965 + }, + { + "compression_loss": 0.0, + "epoch": 0.98, + "label_loss": 0.2263053059577942, + "learning_rate": 2.363741506433425e-05, + "loss": 0.2186, + "step": 5450, + "teacher_loss": 0.2177969217300415 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.2460690140724182, + "learning_rate": 2.3641752204713027e-05, + "loss": 0.3006, + "step": 5451, + "teacher_loss": 0.3066667914390564 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5033208131790161, + "learning_rate": 2.36460893450918e-05, + "loss": 0.4311, + "step": 5452, + "teacher_loss": 0.423112154006958 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4537234306335449, + "learning_rate": 2.365042648547058e-05, + "loss": 0.2061, + "step": 5453, + "teacher_loss": 0.1785932183265686 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.7310105562210083, + "learning_rate": 2.3654763625849356e-05, + "loss": 0.2758, + "step": 5454, + "teacher_loss": 0.2252088189125061 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.8010063171386719, + "learning_rate": 2.3659100766228134e-05, + "loss": 0.3028, + "step": 5455, + "teacher_loss": 0.24742259085178375 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4143591821193695, + "learning_rate": 2.366343790660691e-05, + "loss": 0.24, + "step": 5456, + "teacher_loss": 0.2206798493862152 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5115957260131836, + "learning_rate": 2.366777504698569e-05, + "loss": 0.2924, + "step": 5457, + "teacher_loss": 0.2680544853210449 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3721899092197418, + "learning_rate": 2.3672112187364467e-05, + "loss": 0.3075, + "step": 5458, + "teacher_loss": 0.3002851605415344 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5223338603973389, + "learning_rate": 2.367644932774324e-05, + "loss": 0.2612, + "step": 5459, + "teacher_loss": 0.2322404533624649 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3888382911682129, + "learning_rate": 2.368078646812202e-05, + "loss": 0.2387, + "step": 5460, + "teacher_loss": 0.22202935814857483 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5063039064407349, + "learning_rate": 2.3685123608500796e-05, + "loss": 0.3863, + "step": 5461, + "teacher_loss": 0.37298738956451416 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.2738635540008545, + "learning_rate": 2.3689460748879574e-05, + "loss": 0.1768, + "step": 5462, + "teacher_loss": 0.16601964831352234 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.39991146326065063, + "learning_rate": 2.3693797889258348e-05, + "loss": 0.2241, + "step": 5463, + "teacher_loss": 0.20458745956420898 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.2586379051208496, + "learning_rate": 2.3698135029637126e-05, + "loss": 0.2569, + "step": 5464, + "teacher_loss": 0.25672462582588196 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.7350320219993591, + "learning_rate": 2.3702472170015903e-05, + "loss": 0.2837, + "step": 5465, + "teacher_loss": 0.23354575037956238 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4511708617210388, + "learning_rate": 2.370680931039468e-05, + "loss": 0.3346, + "step": 5466, + "teacher_loss": 0.321644127368927 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.36172378063201904, + "learning_rate": 2.371114645077346e-05, + "loss": 0.1721, + "step": 5467, + "teacher_loss": 0.15097564458847046 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.503262996673584, + "learning_rate": 2.3715483591152233e-05, + "loss": 0.267, + "step": 5468, + "teacher_loss": 0.24076221883296967 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.6842743754386902, + "learning_rate": 2.371982073153101e-05, + "loss": 0.4294, + "step": 5469, + "teacher_loss": 0.40112870931625366 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.529126763343811, + "learning_rate": 2.3724157871909788e-05, + "loss": 0.3839, + "step": 5470, + "teacher_loss": 0.36778998374938965 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.42309868335723877, + "learning_rate": 2.3728495012288566e-05, + "loss": 0.2199, + "step": 5471, + "teacher_loss": 0.19730165600776672 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4516448378562927, + "learning_rate": 2.3732832152667343e-05, + "loss": 0.202, + "step": 5472, + "teacher_loss": 0.17424491047859192 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.30680304765701294, + "learning_rate": 2.373716929304612e-05, + "loss": 0.2009, + "step": 5473, + "teacher_loss": 0.18908946216106415 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4683828353881836, + "learning_rate": 2.3741506433424895e-05, + "loss": 0.287, + "step": 5474, + "teacher_loss": 0.26680734753608704 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3786434531211853, + "learning_rate": 2.3745843573803673e-05, + "loss": 0.2068, + "step": 5475, + "teacher_loss": 0.18773552775382996 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.8037061095237732, + "learning_rate": 2.375018071418245e-05, + "loss": 0.3263, + "step": 5476, + "teacher_loss": 0.27327990531921387 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.27950191497802734, + "learning_rate": 2.3754517854561224e-05, + "loss": 0.234, + "step": 5477, + "teacher_loss": 0.22889825701713562 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.17843934893608093, + "learning_rate": 2.3758854994940002e-05, + "loss": 0.1931, + "step": 5478, + "teacher_loss": 0.1947167068719864 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.19085225462913513, + "learning_rate": 2.376319213531878e-05, + "loss": 0.2144, + "step": 5479, + "teacher_loss": 0.21700209379196167 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.1601770669221878, + "learning_rate": 2.3767529275697557e-05, + "loss": 0.2008, + "step": 5480, + "teacher_loss": 0.20528821647167206 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3767884373664856, + "learning_rate": 2.3771866416076335e-05, + "loss": 0.2949, + "step": 5481, + "teacher_loss": 0.2858337163925171 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.42538249492645264, + "learning_rate": 2.3776203556455113e-05, + "loss": 0.2431, + "step": 5482, + "teacher_loss": 0.22281736135482788 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.2810768485069275, + "learning_rate": 2.378054069683389e-05, + "loss": 0.2638, + "step": 5483, + "teacher_loss": 0.2618332505226135 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.42660534381866455, + "learning_rate": 2.3784877837212668e-05, + "loss": 0.3869, + "step": 5484, + "teacher_loss": 0.3824366629123688 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3473745584487915, + "learning_rate": 2.3789214977591442e-05, + "loss": 0.3016, + "step": 5485, + "teacher_loss": 0.2965286374092102 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5838441848754883, + "learning_rate": 2.3793552117970216e-05, + "loss": 0.4045, + "step": 5486, + "teacher_loss": 0.3846234083175659 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4836907386779785, + "learning_rate": 2.3797889258348994e-05, + "loss": 0.2862, + "step": 5487, + "teacher_loss": 0.2642497420310974 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5806557536125183, + "learning_rate": 2.380222639872777e-05, + "loss": 0.3269, + "step": 5488, + "teacher_loss": 0.2986660301685333 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5350724458694458, + "learning_rate": 2.380656353910655e-05, + "loss": 0.3407, + "step": 5489, + "teacher_loss": 0.31911808252334595 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.9408642649650574, + "learning_rate": 2.3810900679485327e-05, + "loss": 0.2952, + "step": 5490, + "teacher_loss": 0.2234172821044922 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.44192638993263245, + "learning_rate": 2.3815237819864104e-05, + "loss": 0.2393, + "step": 5491, + "teacher_loss": 0.21678532660007477 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3596239387989044, + "learning_rate": 2.3819574960242882e-05, + "loss": 0.2137, + "step": 5492, + "teacher_loss": 0.19747722148895264 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.3033997416496277, + "learning_rate": 2.382391210062166e-05, + "loss": 0.318, + "step": 5493, + "teacher_loss": 0.3195984959602356 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.4174913167953491, + "learning_rate": 2.3828249241000434e-05, + "loss": 0.2917, + "step": 5494, + "teacher_loss": 0.2777055501937866 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.22468841075897217, + "learning_rate": 2.383258638137921e-05, + "loss": 0.2012, + "step": 5495, + "teacher_loss": 0.19859914481639862 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 1.2779312133789062, + "learning_rate": 2.3836923521757986e-05, + "loss": 1.0136, + "step": 5496, + "teacher_loss": 0.9842210412025452 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.2496974766254425, + "learning_rate": 2.3841260662136763e-05, + "loss": 0.304, + "step": 5497, + "teacher_loss": 0.30998706817626953 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.2141002118587494, + "learning_rate": 2.384559780251554e-05, + "loss": 0.2359, + "step": 5498, + "teacher_loss": 0.2382681965827942 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5450341701507568, + "learning_rate": 2.384993494289432e-05, + "loss": 0.3198, + "step": 5499, + "teacher_loss": 0.29481691122055054 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5489734411239624, + "learning_rate": 2.3854272083273096e-05, + "loss": 0.3784, + "step": 5500, + "teacher_loss": 0.3594951033592224 + }, + { + "epoch": 0.99, + "eval_exact_match": 79.73509933774834, + "eval_f1": 87.35944602695977, + "step": 5500 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5233284831047058, + "learning_rate": 2.3858609223651874e-05, + "loss": 0.4033, + "step": 5501, + "teacher_loss": 0.3899722099304199 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5641283988952637, + "learning_rate": 2.386294636403065e-05, + "loss": 0.2863, + "step": 5502, + "teacher_loss": 0.2553831934928894 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.7542777061462402, + "learning_rate": 2.3867283504409426e-05, + "loss": 0.3494, + "step": 5503, + "teacher_loss": 0.30440258979797363 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.5384633541107178, + "learning_rate": 2.3871620644788203e-05, + "loss": 0.2834, + "step": 5504, + "teacher_loss": 0.2550843060016632 + }, + { + "compression_loss": 0.0, + "epoch": 0.99, + "label_loss": 0.7068926692008972, + "learning_rate": 2.387595778516698e-05, + "loss": 0.2929, + "step": 5505, + "teacher_loss": 0.2468506395816803 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5082980394363403, + "learning_rate": 2.388029492554576e-05, + "loss": 0.2234, + "step": 5506, + "teacher_loss": 0.1917380839586258 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.12020628154277802, + "learning_rate": 2.3884632065924533e-05, + "loss": 0.1763, + "step": 5507, + "teacher_loss": 0.18256735801696777 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.4165462851524353, + "learning_rate": 2.388896920630331e-05, + "loss": 0.2266, + "step": 5508, + "teacher_loss": 0.20546376705169678 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.42891794443130493, + "learning_rate": 2.3893306346682088e-05, + "loss": 0.2807, + "step": 5509, + "teacher_loss": 0.264274001121521 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.9357650876045227, + "learning_rate": 2.3897643487060866e-05, + "loss": 0.4576, + "step": 5510, + "teacher_loss": 0.4044947326183319 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3889278471469879, + "learning_rate": 2.3901980627439643e-05, + "loss": 0.2118, + "step": 5511, + "teacher_loss": 0.19211365282535553 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.22113007307052612, + "learning_rate": 2.3906317767818418e-05, + "loss": 0.2511, + "step": 5512, + "teacher_loss": 0.2543991804122925 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.13394254446029663, + "learning_rate": 2.3910654908197195e-05, + "loss": 0.1726, + "step": 5513, + "teacher_loss": 0.1768435835838318 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.32154330611228943, + "learning_rate": 2.3914992048575973e-05, + "loss": 0.1829, + "step": 5514, + "teacher_loss": 0.1675400584936142 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.25767266750335693, + "learning_rate": 2.391932918895475e-05, + "loss": 0.1829, + "step": 5515, + "teacher_loss": 0.17457157373428345 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.39777910709381104, + "learning_rate": 2.3923666329333528e-05, + "loss": 0.1922, + "step": 5516, + "teacher_loss": 0.16930381953716278 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.483955055475235, + "learning_rate": 2.3928003469712306e-05, + "loss": 0.3311, + "step": 5517, + "teacher_loss": 0.31411248445510864 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5580171346664429, + "learning_rate": 2.393234061009108e-05, + "loss": 0.2391, + "step": 5518, + "teacher_loss": 0.20361566543579102 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2070433348417282, + "learning_rate": 2.3936677750469857e-05, + "loss": 0.1595, + "step": 5519, + "teacher_loss": 0.15423351526260376 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3792913556098938, + "learning_rate": 2.3941014890848635e-05, + "loss": 0.3003, + "step": 5520, + "teacher_loss": 0.29149261116981506 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.13489359617233276, + "learning_rate": 2.394535203122741e-05, + "loss": 0.1739, + "step": 5521, + "teacher_loss": 0.17824864387512207 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5052412152290344, + "learning_rate": 2.3949689171606187e-05, + "loss": 0.2683, + "step": 5522, + "teacher_loss": 0.24200588464736938 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2507641613483429, + "learning_rate": 2.3954026311984965e-05, + "loss": 0.4393, + "step": 5523, + "teacher_loss": 0.4602043032646179 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.6958397626876831, + "learning_rate": 2.3958363452363742e-05, + "loss": 0.3561, + "step": 5524, + "teacher_loss": 0.31834876537323 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2722817361354828, + "learning_rate": 2.396270059274252e-05, + "loss": 0.2359, + "step": 5525, + "teacher_loss": 0.23183688521385193 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.21399657428264618, + "learning_rate": 2.3967037733121297e-05, + "loss": 0.1998, + "step": 5526, + "teacher_loss": 0.19817332923412323 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2081492841243744, + "learning_rate": 2.3971374873500075e-05, + "loss": 0.2297, + "step": 5527, + "teacher_loss": 0.23206466436386108 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2719005048274994, + "learning_rate": 2.3975712013878853e-05, + "loss": 0.2534, + "step": 5528, + "teacher_loss": 0.251294881105423 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.348596453666687, + "learning_rate": 2.3980049154257627e-05, + "loss": 0.2809, + "step": 5529, + "teacher_loss": 0.2733915448188782 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3902977406978607, + "learning_rate": 2.39843862946364e-05, + "loss": 0.2965, + "step": 5530, + "teacher_loss": 0.2860453724861145 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5832538604736328, + "learning_rate": 2.398872343501518e-05, + "loss": 0.3181, + "step": 5531, + "teacher_loss": 0.2886642813682556 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.4647487998008728, + "learning_rate": 2.3993060575393956e-05, + "loss": 0.344, + "step": 5532, + "teacher_loss": 0.330563485622406 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2958689332008362, + "learning_rate": 2.3997397715772734e-05, + "loss": 0.2127, + "step": 5533, + "teacher_loss": 0.20342203974723816 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3096369802951813, + "learning_rate": 2.400173485615151e-05, + "loss": 0.3125, + "step": 5534, + "teacher_loss": 0.3128219544887543 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3914162218570709, + "learning_rate": 2.400607199653029e-05, + "loss": 0.3004, + "step": 5535, + "teacher_loss": 0.2903319001197815 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.1795274317264557, + "learning_rate": 2.4010409136909067e-05, + "loss": 0.189, + "step": 5536, + "teacher_loss": 0.19007575511932373 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3075406849384308, + "learning_rate": 2.4014746277287845e-05, + "loss": 0.2474, + "step": 5537, + "teacher_loss": 0.24071195721626282 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2852080464363098, + "learning_rate": 2.4019083417666622e-05, + "loss": 0.2208, + "step": 5538, + "teacher_loss": 0.21366475522518158 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5133790969848633, + "learning_rate": 2.4023420558045396e-05, + "loss": 0.279, + "step": 5539, + "teacher_loss": 0.252974271774292 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.6297202110290527, + "learning_rate": 2.402775769842417e-05, + "loss": 0.2348, + "step": 5540, + "teacher_loss": 0.19095048308372498 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.4543585777282715, + "learning_rate": 2.4032094838802948e-05, + "loss": 0.1974, + "step": 5541, + "teacher_loss": 0.16888773441314697 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3950199484825134, + "learning_rate": 2.4036431979181726e-05, + "loss": 0.2009, + "step": 5542, + "teacher_loss": 0.17931383848190308 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.4443906247615814, + "learning_rate": 2.4040769119560503e-05, + "loss": 0.2901, + "step": 5543, + "teacher_loss": 0.27291643619537354 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.1903933733701706, + "learning_rate": 2.404510625993928e-05, + "loss": 0.15, + "step": 5544, + "teacher_loss": 0.14556069672107697 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.45108562707901, + "learning_rate": 2.404944340031806e-05, + "loss": 0.2391, + "step": 5545, + "teacher_loss": 0.2155945897102356 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.49527227878570557, + "learning_rate": 2.4053780540696836e-05, + "loss": 0.3276, + "step": 5546, + "teacher_loss": 0.30899083614349365 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.7124514579772949, + "learning_rate": 2.405811768107561e-05, + "loss": 0.4123, + "step": 5547, + "teacher_loss": 0.37899094820022583 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.6522963643074036, + "learning_rate": 2.4062454821454388e-05, + "loss": 0.3292, + "step": 5548, + "teacher_loss": 0.2933151125907898 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.7107315063476562, + "learning_rate": 2.4066791961833166e-05, + "loss": 0.324, + "step": 5549, + "teacher_loss": 0.2810158133506775 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3425602316856384, + "learning_rate": 2.407112910221194e-05, + "loss": 0.2057, + "step": 5550, + "teacher_loss": 0.1905456930398941 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.2833007872104645, + "learning_rate": 2.4075466242590718e-05, + "loss": 0.2369, + "step": 5551, + "teacher_loss": 0.23179934918880463 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.36231696605682373, + "learning_rate": 2.4079803382969495e-05, + "loss": 0.3214, + "step": 5552, + "teacher_loss": 0.3168398141860962 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5425717830657959, + "learning_rate": 2.4084140523348273e-05, + "loss": 0.3686, + "step": 5553, + "teacher_loss": 0.3492559790611267 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.4136574864387512, + "learning_rate": 2.408847766372705e-05, + "loss": 0.3536, + "step": 5554, + "teacher_loss": 0.34697774052619934 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3692624866962433, + "learning_rate": 2.4092814804105828e-05, + "loss": 0.3581, + "step": 5555, + "teacher_loss": 0.35681891441345215 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5137479901313782, + "learning_rate": 2.4097151944484602e-05, + "loss": 0.2613, + "step": 5556, + "teacher_loss": 0.2332562804222107 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.5401643514633179, + "learning_rate": 2.410148908486338e-05, + "loss": 0.2539, + "step": 5557, + "teacher_loss": 0.2221173644065857 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.4916079640388489, + "learning_rate": 2.4105826225242158e-05, + "loss": 0.2995, + "step": 5558, + "teacher_loss": 0.2781757712364197 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.8625274896621704, + "learning_rate": 2.4110163365620935e-05, + "loss": 0.5247, + "step": 5559, + "teacher_loss": 0.4871985912322998 + }, + { + "compression_loss": 0.0, + "epoch": 1.0, + "label_loss": 0.3087458908557892, + "learning_rate": 2.4114500505999713e-05, + "loss": 0.2391, + "step": 5560, + "teacher_loss": 0.2313242256641388 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.35308146476745605, + "learning_rate": 2.4118837646378487e-05, + "loss": 0.2169, + "step": 5561, + "teacher_loss": 0.20174546539783478 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.50047367811203, + "learning_rate": 2.4123174786757265e-05, + "loss": 0.3105, + "step": 5562, + "teacher_loss": 0.28936219215393066 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.4380897283554077, + "learning_rate": 2.4127511927136042e-05, + "loss": 0.2635, + "step": 5563, + "teacher_loss": 0.24415223300457 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.20104242861270905, + "learning_rate": 2.413184906751482e-05, + "loss": 0.1772, + "step": 5564, + "teacher_loss": 0.17459365725517273 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5302785038948059, + "learning_rate": 2.4136186207893594e-05, + "loss": 0.2562, + "step": 5565, + "teacher_loss": 0.22577491402626038 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5072332620620728, + "learning_rate": 2.4140523348272372e-05, + "loss": 0.2476, + "step": 5566, + "teacher_loss": 0.21872252225875854 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.43954703211784363, + "learning_rate": 2.414486048865115e-05, + "loss": 0.2412, + "step": 5567, + "teacher_loss": 0.21916130185127258 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.49317100644111633, + "learning_rate": 2.4149197629029927e-05, + "loss": 0.3291, + "step": 5568, + "teacher_loss": 0.31091898679733276 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.4020460844039917, + "learning_rate": 2.4153534769408705e-05, + "loss": 0.2484, + "step": 5569, + "teacher_loss": 0.23137065768241882 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5230247974395752, + "learning_rate": 2.4157871909787482e-05, + "loss": 0.2438, + "step": 5570, + "teacher_loss": 0.2128157615661621 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.7183653116226196, + "learning_rate": 2.416220905016626e-05, + "loss": 0.3636, + "step": 5571, + "teacher_loss": 0.3242231011390686 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.19906014204025269, + "learning_rate": 2.4166546190545034e-05, + "loss": 0.1815, + "step": 5572, + "teacher_loss": 0.1795075386762619 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.7087011337280273, + "learning_rate": 2.4170883330923812e-05, + "loss": 0.3784, + "step": 5573, + "teacher_loss": 0.34170466661453247 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.3014792501926422, + "learning_rate": 2.4175220471302586e-05, + "loss": 0.2922, + "step": 5574, + "teacher_loss": 0.29114097356796265 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.49453943967819214, + "learning_rate": 2.4179557611681364e-05, + "loss": 0.2187, + "step": 5575, + "teacher_loss": 0.18801945447921753 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.650495171546936, + "learning_rate": 2.418389475206014e-05, + "loss": 0.2932, + "step": 5576, + "teacher_loss": 0.2535434067249298 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.44322168827056885, + "learning_rate": 2.418823189243892e-05, + "loss": 0.285, + "step": 5577, + "teacher_loss": 0.2674024701118469 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.3634362816810608, + "learning_rate": 2.4192569032817697e-05, + "loss": 0.2918, + "step": 5578, + "teacher_loss": 0.28381985425949097 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.4401315152645111, + "learning_rate": 2.4196906173196474e-05, + "loss": 0.3111, + "step": 5579, + "teacher_loss": 0.2967928946018219 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 1.008434772491455, + "learning_rate": 2.4201243313575252e-05, + "loss": 0.4831, + "step": 5580, + "teacher_loss": 0.4247262179851532 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.47278016805648804, + "learning_rate": 2.420558045395403e-05, + "loss": 0.2873, + "step": 5581, + "teacher_loss": 0.26672977209091187 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.18486978113651276, + "learning_rate": 2.4209917594332807e-05, + "loss": 0.2154, + "step": 5582, + "teacher_loss": 0.2188088297843933 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.30044764280319214, + "learning_rate": 2.4214254734711578e-05, + "loss": 0.3138, + "step": 5583, + "teacher_loss": 0.3152994215488434 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.4646206200122833, + "learning_rate": 2.4218591875090355e-05, + "loss": 0.2068, + "step": 5584, + "teacher_loss": 0.17817914485931396 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5127899646759033, + "learning_rate": 2.4222929015469133e-05, + "loss": 0.2708, + "step": 5585, + "teacher_loss": 0.24396450817584991 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.94619220495224, + "learning_rate": 2.422726615584791e-05, + "loss": 0.3049, + "step": 5586, + "teacher_loss": 0.2336428165435791 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 1.4700956344604492, + "learning_rate": 2.423160329622669e-05, + "loss": 0.3326, + "step": 5587, + "teacher_loss": 0.20618806779384613 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.37568119168281555, + "learning_rate": 2.4235940436605466e-05, + "loss": 0.2003, + "step": 5588, + "teacher_loss": 0.18084616959095 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.37261492013931274, + "learning_rate": 2.4240277576984244e-05, + "loss": 0.226, + "step": 5589, + "teacher_loss": 0.20975112915039062 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.36511141061782837, + "learning_rate": 2.424461471736302e-05, + "loss": 0.2169, + "step": 5590, + "teacher_loss": 0.20042762160301208 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.2893666625022888, + "learning_rate": 2.4248951857741795e-05, + "loss": 0.1946, + "step": 5591, + "teacher_loss": 0.18404307961463928 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.28168922662734985, + "learning_rate": 2.4253288998120573e-05, + "loss": 0.2315, + "step": 5592, + "teacher_loss": 0.22589761018753052 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.24444711208343506, + "learning_rate": 2.425762613849935e-05, + "loss": 0.2457, + "step": 5593, + "teacher_loss": 0.24585431814193726 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5088082551956177, + "learning_rate": 2.4261963278878125e-05, + "loss": 0.3146, + "step": 5594, + "teacher_loss": 0.2930651605129242 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.8658096790313721, + "learning_rate": 2.4266300419256903e-05, + "loss": 0.2776, + "step": 5595, + "teacher_loss": 0.21224218606948853 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.46944358944892883, + "learning_rate": 2.427063755963568e-05, + "loss": 0.2972, + "step": 5596, + "teacher_loss": 0.2780410647392273 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.24515071511268616, + "learning_rate": 2.4274974700014458e-05, + "loss": 0.2115, + "step": 5597, + "teacher_loss": 0.20781417191028595 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5131568312644958, + "learning_rate": 2.4279311840393235e-05, + "loss": 0.3161, + "step": 5598, + "teacher_loss": 0.2942129969596863 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.16224858164787292, + "learning_rate": 2.4283648980772013e-05, + "loss": 0.2171, + "step": 5599, + "teacher_loss": 0.22315669059753418 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.5373561978340149, + "learning_rate": 2.4287986121150787e-05, + "loss": 0.3193, + "step": 5600, + "teacher_loss": 0.2950502932071686 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.3081502616405487, + "learning_rate": 2.4292323261529565e-05, + "loss": 0.2329, + "step": 5601, + "teacher_loss": 0.2245505005121231 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.2721555531024933, + "learning_rate": 2.4296660401908343e-05, + "loss": 0.2149, + "step": 5602, + "teacher_loss": 0.20850205421447754 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.15866847336292267, + "learning_rate": 2.430099754228712e-05, + "loss": 0.1623, + "step": 5603, + "teacher_loss": 0.16268372535705566 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.497715562582016, + "learning_rate": 2.4305334682665898e-05, + "loss": 0.2456, + "step": 5604, + "teacher_loss": 0.21760836243629456 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.2097712755203247, + "learning_rate": 2.4309671823044672e-05, + "loss": 0.1728, + "step": 5605, + "teacher_loss": 0.16873791813850403 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.10559982806444168, + "learning_rate": 2.431400896342345e-05, + "loss": 0.1642, + "step": 5606, + "teacher_loss": 0.17065785825252533 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 1.039673089981079, + "learning_rate": 2.4318346103802227e-05, + "loss": 0.3897, + "step": 5607, + "teacher_loss": 0.3174961805343628 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.39323747158050537, + "learning_rate": 2.4322683244181005e-05, + "loss": 0.1814, + "step": 5608, + "teacher_loss": 0.15790067613124847 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.33837801218032837, + "learning_rate": 2.432702038455978e-05, + "loss": 0.2382, + "step": 5609, + "teacher_loss": 0.2270343005657196 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.33296412229537964, + "learning_rate": 2.4331357524938557e-05, + "loss": 0.2252, + "step": 5610, + "teacher_loss": 0.21317672729492188 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 1.2495427131652832, + "learning_rate": 2.4335694665317334e-05, + "loss": 0.3054, + "step": 5611, + "teacher_loss": 0.2005338817834854 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.4443269968032837, + "learning_rate": 2.4340031805696112e-05, + "loss": 0.2469, + "step": 5612, + "teacher_loss": 0.22500471770763397 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.7357381582260132, + "learning_rate": 2.434436894607489e-05, + "loss": 0.8905, + "step": 5613, + "teacher_loss": 0.9077329635620117 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 1.0446408987045288, + "learning_rate": 2.4348706086453667e-05, + "loss": 0.366, + "step": 5614, + "teacher_loss": 0.2905818223953247 + }, + { + "compression_loss": 0.0, + "epoch": 1.01, + "label_loss": 0.36794453859329224, + "learning_rate": 2.4353043226832445e-05, + "loss": 0.201, + "step": 5615, + "teacher_loss": 0.18245071172714233 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.6399587988853455, + "learning_rate": 2.435738036721122e-05, + "loss": 0.3256, + "step": 5616, + "teacher_loss": 0.2906665802001953 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.30041027069091797, + "learning_rate": 2.4361717507589997e-05, + "loss": 0.2231, + "step": 5617, + "teacher_loss": 0.21447762846946716 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.6319758892059326, + "learning_rate": 2.436605464796877e-05, + "loss": 0.2338, + "step": 5618, + "teacher_loss": 0.1895652413368225 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5243546962738037, + "learning_rate": 2.437039178834755e-05, + "loss": 0.2638, + "step": 5619, + "teacher_loss": 0.2348591387271881 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.28889650106430054, + "learning_rate": 2.4374728928726326e-05, + "loss": 0.2258, + "step": 5620, + "teacher_loss": 0.21874016523361206 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.8411446809768677, + "learning_rate": 2.4379066069105104e-05, + "loss": 0.4277, + "step": 5621, + "teacher_loss": 0.38174301385879517 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.4184070825576782, + "learning_rate": 2.438340320948388e-05, + "loss": 0.4005, + "step": 5622, + "teacher_loss": 0.3984929919242859 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.2530801296234131, + "learning_rate": 2.438774034986266e-05, + "loss": 0.2229, + "step": 5623, + "teacher_loss": 0.2195446491241455 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5007804036140442, + "learning_rate": 2.4392077490241437e-05, + "loss": 0.4297, + "step": 5624, + "teacher_loss": 0.42185771465301514 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.674731969833374, + "learning_rate": 2.4396414630620214e-05, + "loss": 0.2834, + "step": 5625, + "teacher_loss": 0.2399139553308487 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5432819128036499, + "learning_rate": 2.4400751770998992e-05, + "loss": 0.2931, + "step": 5626, + "teacher_loss": 0.26524823904037476 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 1.1695358753204346, + "learning_rate": 2.4405088911377763e-05, + "loss": 0.5252, + "step": 5627, + "teacher_loss": 0.45355552434921265 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5249483585357666, + "learning_rate": 2.440942605175654e-05, + "loss": 0.2616, + "step": 5628, + "teacher_loss": 0.2323458343744278 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.48556044697761536, + "learning_rate": 2.4413763192135318e-05, + "loss": 0.2638, + "step": 5629, + "teacher_loss": 0.23915642499923706 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5114362239837646, + "learning_rate": 2.4418100332514096e-05, + "loss": 0.2799, + "step": 5630, + "teacher_loss": 0.2541220486164093 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.32199379801750183, + "learning_rate": 2.4422437472892873e-05, + "loss": 0.1888, + "step": 5631, + "teacher_loss": 0.1740495264530182 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 1.7694306373596191, + "learning_rate": 2.442677461327165e-05, + "loss": 0.7707, + "step": 5632, + "teacher_loss": 0.6597065329551697 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.38596707582473755, + "learning_rate": 2.443111175365043e-05, + "loss": 0.2196, + "step": 5633, + "teacher_loss": 0.20110058784484863 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.4178635776042938, + "learning_rate": 2.4435448894029206e-05, + "loss": 0.1697, + "step": 5634, + "teacher_loss": 0.14209628105163574 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.47275400161743164, + "learning_rate": 2.443978603440798e-05, + "loss": 0.2192, + "step": 5635, + "teacher_loss": 0.19101710617542267 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.508428692817688, + "learning_rate": 2.4444123174786758e-05, + "loss": 0.2112, + "step": 5636, + "teacher_loss": 0.17816904187202454 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.2387896329164505, + "learning_rate": 2.4448460315165536e-05, + "loss": 0.1789, + "step": 5637, + "teacher_loss": 0.17229697108268738 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.2613878846168518, + "learning_rate": 2.445279745554431e-05, + "loss": 0.2016, + "step": 5638, + "teacher_loss": 0.19499284029006958 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.47389528155326843, + "learning_rate": 2.4457134595923087e-05, + "loss": 0.2429, + "step": 5639, + "teacher_loss": 0.21727406978607178 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.6177856922149658, + "learning_rate": 2.4461471736301865e-05, + "loss": 0.3746, + "step": 5640, + "teacher_loss": 0.3475687503814697 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.44690102338790894, + "learning_rate": 2.4465808876680643e-05, + "loss": 0.298, + "step": 5641, + "teacher_loss": 0.2814851403236389 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.42904990911483765, + "learning_rate": 2.447014601705942e-05, + "loss": 0.2846, + "step": 5642, + "teacher_loss": 0.26853519678115845 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.39792031049728394, + "learning_rate": 2.4474483157438198e-05, + "loss": 0.238, + "step": 5643, + "teacher_loss": 0.22022143006324768 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5822258591651917, + "learning_rate": 2.4478820297816972e-05, + "loss": 0.2443, + "step": 5644, + "teacher_loss": 0.2067420929670334 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5057730674743652, + "learning_rate": 2.448315743819575e-05, + "loss": 0.2146, + "step": 5645, + "teacher_loss": 0.18219396471977234 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.4598786234855652, + "learning_rate": 2.4487494578574527e-05, + "loss": 0.2756, + "step": 5646, + "teacher_loss": 0.25508224964141846 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.47035300731658936, + "learning_rate": 2.4491831718953305e-05, + "loss": 0.3042, + "step": 5647, + "teacher_loss": 0.28577524423599243 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 1.4768092632293701, + "learning_rate": 2.449616885933208e-05, + "loss": 0.8271, + "step": 5648, + "teacher_loss": 0.7549407482147217 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5571060180664062, + "learning_rate": 2.4500505999710857e-05, + "loss": 0.2588, + "step": 5649, + "teacher_loss": 0.22560936212539673 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.27670279145240784, + "learning_rate": 2.4504843140089635e-05, + "loss": 0.213, + "step": 5650, + "teacher_loss": 0.20595984160900116 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.24549859762191772, + "learning_rate": 2.4509180280468412e-05, + "loss": 0.2625, + "step": 5651, + "teacher_loss": 0.2643560767173767 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.4431856870651245, + "learning_rate": 2.451351742084719e-05, + "loss": 0.2069, + "step": 5652, + "teacher_loss": 0.18060001730918884 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.13577072322368622, + "learning_rate": 2.4517854561225964e-05, + "loss": 0.1649, + "step": 5653, + "teacher_loss": 0.1681235283613205 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.4667758047580719, + "learning_rate": 2.452219170160474e-05, + "loss": 0.2784, + "step": 5654, + "teacher_loss": 0.25750458240509033 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.3781157433986664, + "learning_rate": 2.452652884198352e-05, + "loss": 0.1923, + "step": 5655, + "teacher_loss": 0.1716112494468689 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.6267328262329102, + "learning_rate": 2.4530865982362297e-05, + "loss": 0.3008, + "step": 5656, + "teacher_loss": 0.2645624279975891 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.35940712690353394, + "learning_rate": 2.4535203122741074e-05, + "loss": 0.3256, + "step": 5657, + "teacher_loss": 0.32188737392425537 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.2657119631767273, + "learning_rate": 2.4539540263119852e-05, + "loss": 0.21, + "step": 5658, + "teacher_loss": 0.20382657647132874 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5109708905220032, + "learning_rate": 2.4543877403498626e-05, + "loss": 0.2585, + "step": 5659, + "teacher_loss": 0.2304304838180542 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.3855929970741272, + "learning_rate": 2.4548214543877404e-05, + "loss": 0.3898, + "step": 5660, + "teacher_loss": 0.39021727442741394 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.454128235578537, + "learning_rate": 2.455255168425618e-05, + "loss": 0.2615, + "step": 5661, + "teacher_loss": 0.24013438820838928 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.41541045904159546, + "learning_rate": 2.4556888824634956e-05, + "loss": 0.2798, + "step": 5662, + "teacher_loss": 0.2647482752799988 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.4303972125053406, + "learning_rate": 2.4561225965013733e-05, + "loss": 0.2202, + "step": 5663, + "teacher_loss": 0.19681212306022644 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.30703192949295044, + "learning_rate": 2.456556310539251e-05, + "loss": 0.2182, + "step": 5664, + "teacher_loss": 0.20830154418945312 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.3043895363807678, + "learning_rate": 2.456990024577129e-05, + "loss": 0.2124, + "step": 5665, + "teacher_loss": 0.20221829414367676 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.37489616870880127, + "learning_rate": 2.4574237386150066e-05, + "loss": 0.2544, + "step": 5666, + "teacher_loss": 0.24095915257930756 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.5221413969993591, + "learning_rate": 2.4578574526528844e-05, + "loss": 0.2835, + "step": 5667, + "teacher_loss": 0.2569735050201416 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 1.3497745990753174, + "learning_rate": 2.458291166690762e-05, + "loss": 0.408, + "step": 5668, + "teacher_loss": 0.3033583164215088 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.2913971543312073, + "learning_rate": 2.45872488072864e-05, + "loss": 0.2414, + "step": 5669, + "teacher_loss": 0.23583179712295532 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.7599729299545288, + "learning_rate": 2.4591585947665173e-05, + "loss": 0.2996, + "step": 5670, + "teacher_loss": 0.2484346181154251 + }, + { + "compression_loss": 0.0, + "epoch": 1.02, + "label_loss": 0.3934428095817566, + "learning_rate": 2.4595923088043948e-05, + "loss": 0.1714, + "step": 5671, + "teacher_loss": 0.1467587947845459 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.3594421446323395, + "learning_rate": 2.4600260228422725e-05, + "loss": 0.215, + "step": 5672, + "teacher_loss": 0.1989109218120575 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.36442431807518005, + "learning_rate": 2.4604597368801503e-05, + "loss": 0.2087, + "step": 5673, + "teacher_loss": 0.19134564697742462 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.3963119387626648, + "learning_rate": 2.460893450918028e-05, + "loss": 0.2275, + "step": 5674, + "teacher_loss": 0.208769753575325 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4182215929031372, + "learning_rate": 2.4613271649559058e-05, + "loss": 0.3111, + "step": 5675, + "teacher_loss": 0.2992406189441681 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4566314220428467, + "learning_rate": 2.4617608789937836e-05, + "loss": 0.2726, + "step": 5676, + "teacher_loss": 0.2522006630897522 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 1.3192243576049805, + "learning_rate": 2.4621945930316613e-05, + "loss": 0.6681, + "step": 5677, + "teacher_loss": 0.5957131385803223 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4689609408378601, + "learning_rate": 2.462628307069539e-05, + "loss": 0.2346, + "step": 5678, + "teacher_loss": 0.20852145552635193 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4360184073448181, + "learning_rate": 2.4630620211074165e-05, + "loss": 0.2814, + "step": 5679, + "teacher_loss": 0.26425686478614807 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.38606777787208557, + "learning_rate": 2.4634957351452943e-05, + "loss": 0.216, + "step": 5680, + "teacher_loss": 0.19708159565925598 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.2852972149848938, + "learning_rate": 2.4639294491831717e-05, + "loss": 0.2439, + "step": 5681, + "teacher_loss": 0.23929661512374878 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.2817647457122803, + "learning_rate": 2.4643631632210495e-05, + "loss": 0.1758, + "step": 5682, + "teacher_loss": 0.16400158405303955 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.6942086219787598, + "learning_rate": 2.4647968772589272e-05, + "loss": 0.6155, + "step": 5683, + "teacher_loss": 0.6067664623260498 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4805641770362854, + "learning_rate": 2.465230591296805e-05, + "loss": 0.2766, + "step": 5684, + "teacher_loss": 0.2539036273956299 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.3763279914855957, + "learning_rate": 2.4656643053346828e-05, + "loss": 0.2064, + "step": 5685, + "teacher_loss": 0.18757084012031555 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.35183706879615784, + "learning_rate": 2.4660980193725605e-05, + "loss": 0.1977, + "step": 5686, + "teacher_loss": 0.1805717945098877 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.1790233552455902, + "learning_rate": 2.4665317334104383e-05, + "loss": 0.1731, + "step": 5687, + "teacher_loss": 0.17249321937561035 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4301919937133789, + "learning_rate": 2.4669654474483157e-05, + "loss": 0.2651, + "step": 5688, + "teacher_loss": 0.2467479407787323 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.3238109350204468, + "learning_rate": 2.4673991614861935e-05, + "loss": 0.1968, + "step": 5689, + "teacher_loss": 0.18274295330047607 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5029712319374084, + "learning_rate": 2.4678328755240712e-05, + "loss": 0.2557, + "step": 5690, + "teacher_loss": 0.22824493050575256 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.26752063632011414, + "learning_rate": 2.468266589561949e-05, + "loss": 0.4595, + "step": 5691, + "teacher_loss": 0.4807853102684021 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.6211721301078796, + "learning_rate": 2.4687003035998264e-05, + "loss": 0.2288, + "step": 5692, + "teacher_loss": 0.1851685643196106 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5095622539520264, + "learning_rate": 2.4691340176377042e-05, + "loss": 0.2136, + "step": 5693, + "teacher_loss": 0.1807289570569992 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.15095245838165283, + "learning_rate": 2.469567731675582e-05, + "loss": 0.1858, + "step": 5694, + "teacher_loss": 0.1896858662366867 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.3545226752758026, + "learning_rate": 2.4700014457134597e-05, + "loss": 0.2592, + "step": 5695, + "teacher_loss": 0.24865896999835968 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.28454339504241943, + "learning_rate": 2.4704351597513375e-05, + "loss": 0.2664, + "step": 5696, + "teacher_loss": 0.2644324004650116 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.1250162273645401, + "learning_rate": 2.470868873789215e-05, + "loss": 0.1747, + "step": 5697, + "teacher_loss": 0.18018421530723572 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5807621479034424, + "learning_rate": 2.4713025878270926e-05, + "loss": 0.2632, + "step": 5698, + "teacher_loss": 0.22794674336910248 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.4761284291744232, + "learning_rate": 2.4717363018649704e-05, + "loss": 0.2414, + "step": 5699, + "teacher_loss": 0.21528282761573792 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.6427435278892517, + "learning_rate": 2.4721700159028482e-05, + "loss": 0.2438, + "step": 5700, + "teacher_loss": 0.19942112267017365 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.40813300013542175, + "learning_rate": 2.472603729940726e-05, + "loss": 0.2141, + "step": 5701, + "teacher_loss": 0.1925884187221527 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.17408955097198486, + "learning_rate": 2.4730374439786037e-05, + "loss": 0.2154, + "step": 5702, + "teacher_loss": 0.2200450301170349 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.279409795999527, + "learning_rate": 2.473471158016481e-05, + "loss": 0.1698, + "step": 5703, + "teacher_loss": 0.15761703252792358 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5663573145866394, + "learning_rate": 2.473904872054359e-05, + "loss": 0.2847, + "step": 5704, + "teacher_loss": 0.2534576654434204 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.39080384373664856, + "learning_rate": 2.4743385860922366e-05, + "loss": 0.2409, + "step": 5705, + "teacher_loss": 0.2242712676525116 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.33358481526374817, + "learning_rate": 2.474772300130114e-05, + "loss": 0.2619, + "step": 5706, + "teacher_loss": 0.253900408744812 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.8228200674057007, + "learning_rate": 2.4752060141679918e-05, + "loss": 0.2874, + "step": 5707, + "teacher_loss": 0.22791078686714172 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.34778741002082825, + "learning_rate": 2.4756397282058696e-05, + "loss": 0.2924, + "step": 5708, + "teacher_loss": 0.2862427234649658 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.6013121008872986, + "learning_rate": 2.4760734422437474e-05, + "loss": 0.3065, + "step": 5709, + "teacher_loss": 0.27379727363586426 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.8529046773910522, + "learning_rate": 2.476507156281625e-05, + "loss": 0.4739, + "step": 5710, + "teacher_loss": 0.4317663908004761 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.37333786487579346, + "learning_rate": 2.476940870319503e-05, + "loss": 0.263, + "step": 5711, + "teacher_loss": 0.2507673501968384 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.33566492795944214, + "learning_rate": 2.4773745843573806e-05, + "loss": 0.198, + "step": 5712, + "teacher_loss": 0.1826917976140976 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.614032506942749, + "learning_rate": 2.4778082983952584e-05, + "loss": 0.3004, + "step": 5713, + "teacher_loss": 0.26554590463638306 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.7869497537612915, + "learning_rate": 2.4782420124331358e-05, + "loss": 0.3549, + "step": 5714, + "teacher_loss": 0.3068962097167969 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.19687432050704956, + "learning_rate": 2.4786757264710133e-05, + "loss": 0.1788, + "step": 5715, + "teacher_loss": 0.17675325274467468 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5800275802612305, + "learning_rate": 2.479109440508891e-05, + "loss": 0.3885, + "step": 5716, + "teacher_loss": 0.36716651916503906 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5742631554603577, + "learning_rate": 2.4795431545467688e-05, + "loss": 0.315, + "step": 5717, + "teacher_loss": 0.28618019819259644 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5100072026252747, + "learning_rate": 2.4799768685846465e-05, + "loss": 0.2694, + "step": 5718, + "teacher_loss": 0.24268606305122375 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5326964855194092, + "learning_rate": 2.4804105826225243e-05, + "loss": 0.2802, + "step": 5719, + "teacher_loss": 0.2521963119506836 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.1128014326095581, + "learning_rate": 2.480844296660402e-05, + "loss": 0.1637, + "step": 5720, + "teacher_loss": 0.16936154663562775 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.35093826055526733, + "learning_rate": 2.4812780106982798e-05, + "loss": 0.3122, + "step": 5721, + "teacher_loss": 0.30794641375541687 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.21749761700630188, + "learning_rate": 2.4817117247361576e-05, + "loss": 0.2036, + "step": 5722, + "teacher_loss": 0.20202505588531494 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.12070222198963165, + "learning_rate": 2.4821454387740353e-05, + "loss": 0.1463, + "step": 5723, + "teacher_loss": 0.14916247129440308 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.15647193789482117, + "learning_rate": 2.4825791528119128e-05, + "loss": 0.1674, + "step": 5724, + "teacher_loss": 0.1686602532863617 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.5443297624588013, + "learning_rate": 2.4830128668497902e-05, + "loss": 0.3033, + "step": 5725, + "teacher_loss": 0.27650344371795654 + }, + { + "compression_loss": 0.0, + "epoch": 1.03, + "label_loss": 0.31578707695007324, + "learning_rate": 2.483446580887668e-05, + "loss": 0.1934, + "step": 5726, + "teacher_loss": 0.1797824501991272 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3873903155326843, + "learning_rate": 2.4838802949255457e-05, + "loss": 0.2784, + "step": 5727, + "teacher_loss": 0.2663387954235077 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3121715784072876, + "learning_rate": 2.4843140089634235e-05, + "loss": 0.2797, + "step": 5728, + "teacher_loss": 0.27609914541244507 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4188960790634155, + "learning_rate": 2.4847477230013012e-05, + "loss": 0.3351, + "step": 5729, + "teacher_loss": 0.3258160352706909 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5170630216598511, + "learning_rate": 2.485181437039179e-05, + "loss": 0.2877, + "step": 5730, + "teacher_loss": 0.262221097946167 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4651789367198944, + "learning_rate": 2.4856151510770568e-05, + "loss": 0.2017, + "step": 5731, + "teacher_loss": 0.17238225042819977 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.271403968334198, + "learning_rate": 2.4860488651149342e-05, + "loss": 0.273, + "step": 5732, + "teacher_loss": 0.2731597125530243 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.44333982467651367, + "learning_rate": 2.486482579152812e-05, + "loss": 0.274, + "step": 5733, + "teacher_loss": 0.2551373839378357 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3212038278579712, + "learning_rate": 2.4869162931906897e-05, + "loss": 0.2446, + "step": 5734, + "teacher_loss": 0.23614084720611572 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5660072565078735, + "learning_rate": 2.4873500072285675e-05, + "loss": 0.2539, + "step": 5735, + "teacher_loss": 0.21925979852676392 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.7995697855949402, + "learning_rate": 2.487783721266445e-05, + "loss": 0.3267, + "step": 5736, + "teacher_loss": 0.2742080092430115 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3201525807380676, + "learning_rate": 2.4882174353043227e-05, + "loss": 0.2086, + "step": 5737, + "teacher_loss": 0.19622795283794403 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.34195366501808167, + "learning_rate": 2.4886511493422004e-05, + "loss": 0.2218, + "step": 5738, + "teacher_loss": 0.20847046375274658 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 1.2613153457641602, + "learning_rate": 2.4890848633800782e-05, + "loss": 0.335, + "step": 5739, + "teacher_loss": 0.23206044733524323 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4662017524242401, + "learning_rate": 2.489518577417956e-05, + "loss": 0.2837, + "step": 5740, + "teacher_loss": 0.263388067483902 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4382407069206238, + "learning_rate": 2.4899522914558334e-05, + "loss": 0.2052, + "step": 5741, + "teacher_loss": 0.1792682707309723 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.6544212102890015, + "learning_rate": 2.490386005493711e-05, + "loss": 0.4402, + "step": 5742, + "teacher_loss": 0.41640496253967285 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5321322083473206, + "learning_rate": 2.490819719531589e-05, + "loss": 0.2177, + "step": 5743, + "teacher_loss": 0.18279704451560974 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4202308654785156, + "learning_rate": 2.4912534335694667e-05, + "loss": 0.3667, + "step": 5744, + "teacher_loss": 0.3607712984085083 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.6600340008735657, + "learning_rate": 2.4916871476073444e-05, + "loss": 0.3302, + "step": 5745, + "teacher_loss": 0.29359379410743713 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5137569904327393, + "learning_rate": 2.492120861645222e-05, + "loss": 0.1858, + "step": 5746, + "teacher_loss": 0.14932073652744293 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.7411439418792725, + "learning_rate": 2.4925545756830996e-05, + "loss": 0.2865, + "step": 5747, + "teacher_loss": 0.23600199818611145 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.32975006103515625, + "learning_rate": 2.4929882897209774e-05, + "loss": 0.2187, + "step": 5748, + "teacher_loss": 0.20637497305870056 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.2602580487728119, + "learning_rate": 2.493422003758855e-05, + "loss": 0.2835, + "step": 5749, + "teacher_loss": 0.28605425357818604 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.17854523658752441, + "learning_rate": 2.4938557177967326e-05, + "loss": 0.1978, + "step": 5750, + "teacher_loss": 0.19992607831954956 + }, + { + "epoch": 1.04, + "eval_exact_match": 79.28098391674551, + "eval_f1": 86.92138248670703, + "step": 5750 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.7560309171676636, + "learning_rate": 2.4942894318346103e-05, + "loss": 0.3831, + "step": 5751, + "teacher_loss": 0.34162116050720215 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.386283814907074, + "learning_rate": 2.494723145872488e-05, + "loss": 0.3137, + "step": 5752, + "teacher_loss": 0.3056256175041199 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.8294663429260254, + "learning_rate": 2.495156859910366e-05, + "loss": 0.3401, + "step": 5753, + "teacher_loss": 0.2857303023338318 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5904160141944885, + "learning_rate": 2.4955905739482436e-05, + "loss": 0.2127, + "step": 5754, + "teacher_loss": 0.17068493366241455 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.2206193208694458, + "learning_rate": 2.4960242879861214e-05, + "loss": 0.2853, + "step": 5755, + "teacher_loss": 0.29246532917022705 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3550376892089844, + "learning_rate": 2.496458002023999e-05, + "loss": 0.192, + "step": 5756, + "teacher_loss": 0.1738990843296051 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.1980118453502655, + "learning_rate": 2.4968917160618766e-05, + "loss": 0.1599, + "step": 5757, + "teacher_loss": 0.1556473821401596 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.6552790999412537, + "learning_rate": 2.4973254300997543e-05, + "loss": 0.2733, + "step": 5758, + "teacher_loss": 0.23090949654579163 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.39710724353790283, + "learning_rate": 2.4977591441376317e-05, + "loss": 0.2731, + "step": 5759, + "teacher_loss": 0.259337842464447 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5608937740325928, + "learning_rate": 2.4981928581755095e-05, + "loss": 0.3201, + "step": 5760, + "teacher_loss": 0.29334133863449097 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.36709702014923096, + "learning_rate": 2.4986265722133873e-05, + "loss": 0.2884, + "step": 5761, + "teacher_loss": 0.2796553671360016 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.5096365213394165, + "learning_rate": 2.499060286251265e-05, + "loss": 0.326, + "step": 5762, + "teacher_loss": 0.3056340515613556 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.6187939643859863, + "learning_rate": 2.4994940002891428e-05, + "loss": 0.2263, + "step": 5763, + "teacher_loss": 0.18266212940216064 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.472749799489975, + "learning_rate": 2.4999277143270205e-05, + "loss": 0.2872, + "step": 5764, + "teacher_loss": 0.2665541470050812 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.14188289642333984, + "learning_rate": 2.5003614283648983e-05, + "loss": 0.1839, + "step": 5765, + "teacher_loss": 0.18857935070991516 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.8677908182144165, + "learning_rate": 2.500795142402776e-05, + "loss": 0.4434, + "step": 5766, + "teacher_loss": 0.39629605412483215 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.33384984731674194, + "learning_rate": 2.501228856440654e-05, + "loss": 0.27, + "step": 5767, + "teacher_loss": 0.26286375522613525 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3554983139038086, + "learning_rate": 2.501662570478531e-05, + "loss": 0.1878, + "step": 5768, + "teacher_loss": 0.1691453605890274 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.46972525119781494, + "learning_rate": 2.5020962845164087e-05, + "loss": 0.2699, + "step": 5769, + "teacher_loss": 0.2476898580789566 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.43315064907073975, + "learning_rate": 2.5025299985542864e-05, + "loss": 0.2426, + "step": 5770, + "teacher_loss": 0.2214576005935669 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4194457530975342, + "learning_rate": 2.5029637125921642e-05, + "loss": 0.2931, + "step": 5771, + "teacher_loss": 0.2790180444717407 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.18961502611637115, + "learning_rate": 2.503397426630042e-05, + "loss": 0.1546, + "step": 5772, + "teacher_loss": 0.15073883533477783 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.8729780912399292, + "learning_rate": 2.5038311406679197e-05, + "loss": 0.3194, + "step": 5773, + "teacher_loss": 0.2578945755958557 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.45669692754745483, + "learning_rate": 2.5042648547057975e-05, + "loss": 0.3062, + "step": 5774, + "teacher_loss": 0.2894551157951355 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.32803791761398315, + "learning_rate": 2.5046985687436753e-05, + "loss": 0.2683, + "step": 5775, + "teacher_loss": 0.2616109251976013 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.6847606897354126, + "learning_rate": 2.5051322827815527e-05, + "loss": 0.3203, + "step": 5776, + "teacher_loss": 0.2797488570213318 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.7289848327636719, + "learning_rate": 2.5055659968194304e-05, + "loss": 0.3537, + "step": 5777, + "teacher_loss": 0.3120438754558563 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.4425666630268097, + "learning_rate": 2.5059997108573082e-05, + "loss": 0.3525, + "step": 5778, + "teacher_loss": 0.342507541179657 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.643241822719574, + "learning_rate": 2.5064334248951856e-05, + "loss": 0.285, + "step": 5779, + "teacher_loss": 0.24525006115436554 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.47755759954452515, + "learning_rate": 2.5068671389330634e-05, + "loss": 0.3808, + "step": 5780, + "teacher_loss": 0.37009796500205994 + }, + { + "compression_loss": 0.0, + "epoch": 1.04, + "label_loss": 0.3420846462249756, + "learning_rate": 2.507300852970941e-05, + "loss": 0.1777, + "step": 5781, + "teacher_loss": 0.15946078300476074 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.20883983373641968, + "learning_rate": 2.507734567008819e-05, + "loss": 0.2447, + "step": 5782, + "teacher_loss": 0.24867497384548187 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5636640787124634, + "learning_rate": 2.5081682810466967e-05, + "loss": 0.2474, + "step": 5783, + "teacher_loss": 0.21228128671646118 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.3770233988761902, + "learning_rate": 2.5086019950845744e-05, + "loss": 0.2283, + "step": 5784, + "teacher_loss": 0.21182557940483093 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.4271715581417084, + "learning_rate": 2.509035709122452e-05, + "loss": 0.2639, + "step": 5785, + "teacher_loss": 0.24580763280391693 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5708246231079102, + "learning_rate": 2.5094694231603296e-05, + "loss": 0.3439, + "step": 5786, + "teacher_loss": 0.31866660714149475 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.2131984531879425, + "learning_rate": 2.5099031371982074e-05, + "loss": 0.191, + "step": 5787, + "teacher_loss": 0.18852703273296356 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.7692008018493652, + "learning_rate": 2.510336851236085e-05, + "loss": 0.2798, + "step": 5788, + "teacher_loss": 0.22544288635253906 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.7904120683670044, + "learning_rate": 2.510770565273963e-05, + "loss": 0.4143, + "step": 5789, + "teacher_loss": 0.37247389554977417 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.35019925236701965, + "learning_rate": 2.5112042793118403e-05, + "loss": 0.2063, + "step": 5790, + "teacher_loss": 0.1903364360332489 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.1626065969467163, + "learning_rate": 2.511637993349718e-05, + "loss": 0.174, + "step": 5791, + "teacher_loss": 0.175270214676857 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.4408935606479645, + "learning_rate": 2.512071707387596e-05, + "loss": 0.2522, + "step": 5792, + "teacher_loss": 0.23125328123569489 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.564699113368988, + "learning_rate": 2.5125054214254736e-05, + "loss": 0.2785, + "step": 5793, + "teacher_loss": 0.24667063355445862 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.22682073712348938, + "learning_rate": 2.512939135463351e-05, + "loss": 0.2326, + "step": 5794, + "teacher_loss": 0.2332293838262558 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.7990626096725464, + "learning_rate": 2.5133728495012288e-05, + "loss": 0.3527, + "step": 5795, + "teacher_loss": 0.3031332492828369 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.23610055446624756, + "learning_rate": 2.5138065635391066e-05, + "loss": 0.21, + "step": 5796, + "teacher_loss": 0.20705491304397583 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.6262083053588867, + "learning_rate": 2.5142402775769843e-05, + "loss": 0.4034, + "step": 5797, + "teacher_loss": 0.3786849081516266 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.32296305894851685, + "learning_rate": 2.514673991614862e-05, + "loss": 0.2141, + "step": 5798, + "teacher_loss": 0.2019776850938797 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 1.28407883644104, + "learning_rate": 2.51510770565274e-05, + "loss": 0.3252, + "step": 5799, + "teacher_loss": 0.21862280368804932 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.6629778742790222, + "learning_rate": 2.5155414196906176e-05, + "loss": 0.3525, + "step": 5800, + "teacher_loss": 0.3180267810821533 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5553765892982483, + "learning_rate": 2.515975133728495e-05, + "loss": 0.2279, + "step": 5801, + "teacher_loss": 0.19147717952728271 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.46481117606163025, + "learning_rate": 2.5164088477663728e-05, + "loss": 0.2298, + "step": 5802, + "teacher_loss": 0.20370006561279297 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.4750494062900543, + "learning_rate": 2.5168425618042502e-05, + "loss": 0.2226, + "step": 5803, + "teacher_loss": 0.19451621174812317 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.4932330846786499, + "learning_rate": 2.517276275842128e-05, + "loss": 0.2059, + "step": 5804, + "teacher_loss": 0.17402797937393188 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.44331634044647217, + "learning_rate": 2.5177099898800058e-05, + "loss": 0.3422, + "step": 5805, + "teacher_loss": 0.33091285824775696 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.583695650100708, + "learning_rate": 2.5181437039178835e-05, + "loss": 0.288, + "step": 5806, + "teacher_loss": 0.2551591396331787 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5548825263977051, + "learning_rate": 2.5185774179557613e-05, + "loss": 0.3794, + "step": 5807, + "teacher_loss": 0.35985425114631653 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.2901937961578369, + "learning_rate": 2.519011131993639e-05, + "loss": 0.3196, + "step": 5808, + "teacher_loss": 0.3228548765182495 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.3566325008869171, + "learning_rate": 2.5194448460315168e-05, + "loss": 0.2287, + "step": 5809, + "teacher_loss": 0.2145385593175888 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.1724443882703781, + "learning_rate": 2.5198785600693946e-05, + "loss": 0.1839, + "step": 5810, + "teacher_loss": 0.18521729111671448 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.2794632911682129, + "learning_rate": 2.5203122741072723e-05, + "loss": 0.2557, + "step": 5811, + "teacher_loss": 0.25300610065460205 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.3794358968734741, + "learning_rate": 2.5207459881451494e-05, + "loss": 0.2963, + "step": 5812, + "teacher_loss": 0.2870127558708191 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.442825049161911, + "learning_rate": 2.5211797021830272e-05, + "loss": 0.2058, + "step": 5813, + "teacher_loss": 0.17951908707618713 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.27354946732521057, + "learning_rate": 2.521613416220905e-05, + "loss": 0.2554, + "step": 5814, + "teacher_loss": 0.2534312903881073 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.44062986969947815, + "learning_rate": 2.5220471302587827e-05, + "loss": 0.2418, + "step": 5815, + "teacher_loss": 0.21971175074577332 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.6296927332878113, + "learning_rate": 2.5224808442966605e-05, + "loss": 0.5582, + "step": 5816, + "teacher_loss": 0.5502831935882568 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.29670774936676025, + "learning_rate": 2.5229145583345382e-05, + "loss": 0.2544, + "step": 5817, + "teacher_loss": 0.24974079430103302 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.4668574333190918, + "learning_rate": 2.523348272372416e-05, + "loss": 0.2763, + "step": 5818, + "teacher_loss": 0.25517359375953674 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.28109365701675415, + "learning_rate": 2.5237819864102937e-05, + "loss": 0.2226, + "step": 5819, + "teacher_loss": 0.21614226698875427 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.6486839056015015, + "learning_rate": 2.524215700448171e-05, + "loss": 0.2723, + "step": 5820, + "teacher_loss": 0.2304602712392807 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.3750072121620178, + "learning_rate": 2.524649414486049e-05, + "loss": 0.2947, + "step": 5821, + "teacher_loss": 0.28574198484420776 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.41910210251808167, + "learning_rate": 2.5250831285239267e-05, + "loss": 0.2904, + "step": 5822, + "teacher_loss": 0.2760849893093109 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.7422313690185547, + "learning_rate": 2.525516842561804e-05, + "loss": 0.3744, + "step": 5823, + "teacher_loss": 0.3335673213005066 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5846238732337952, + "learning_rate": 2.525950556599682e-05, + "loss": 0.2464, + "step": 5824, + "teacher_loss": 0.20882129669189453 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5884277820587158, + "learning_rate": 2.5263842706375596e-05, + "loss": 0.2111, + "step": 5825, + "teacher_loss": 0.1692180186510086 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.6069173812866211, + "learning_rate": 2.5268179846754374e-05, + "loss": 0.2826, + "step": 5826, + "teacher_loss": 0.2466059923171997 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.3188580870628357, + "learning_rate": 2.527251698713315e-05, + "loss": 0.2104, + "step": 5827, + "teacher_loss": 0.1984032392501831 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5554224252700806, + "learning_rate": 2.527685412751193e-05, + "loss": 0.4018, + "step": 5828, + "teacher_loss": 0.3847217559814453 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.702427864074707, + "learning_rate": 2.5281191267890703e-05, + "loss": 0.2083, + "step": 5829, + "teacher_loss": 0.15334858000278473 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.31164172291755676, + "learning_rate": 2.528552840826948e-05, + "loss": 0.2783, + "step": 5830, + "teacher_loss": 0.27454712986946106 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.4595423936843872, + "learning_rate": 2.528986554864826e-05, + "loss": 0.2429, + "step": 5831, + "teacher_loss": 0.21886922419071198 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.8257927298545837, + "learning_rate": 2.5294202689027036e-05, + "loss": 0.2968, + "step": 5832, + "teacher_loss": 0.23804402351379395 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.5439439415931702, + "learning_rate": 2.5298539829405814e-05, + "loss": 0.3352, + "step": 5833, + "teacher_loss": 0.31195998191833496 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.35350432991981506, + "learning_rate": 2.5302876969784588e-05, + "loss": 0.2859, + "step": 5834, + "teacher_loss": 0.2783682346343994 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.422508180141449, + "learning_rate": 2.5307214110163366e-05, + "loss": 0.2366, + "step": 5835, + "teacher_loss": 0.21598462760448456 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.641747236251831, + "learning_rate": 2.5311551250542143e-05, + "loss": 0.4979, + "step": 5836, + "teacher_loss": 0.4819698929786682 + }, + { + "compression_loss": 0.0, + "epoch": 1.05, + "label_loss": 0.805057168006897, + "learning_rate": 2.531588839092092e-05, + "loss": 0.4214, + "step": 5837, + "teacher_loss": 0.378801167011261 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.34252700209617615, + "learning_rate": 2.5320225531299695e-05, + "loss": 0.2755, + "step": 5838, + "teacher_loss": 0.2680216431617737 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.205826535820961, + "learning_rate": 2.5324562671678473e-05, + "loss": 0.1454, + "step": 5839, + "teacher_loss": 0.13867482542991638 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.5726264119148254, + "learning_rate": 2.532889981205725e-05, + "loss": 0.3103, + "step": 5840, + "teacher_loss": 0.2811729907989502 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.2583940625190735, + "learning_rate": 2.5333236952436028e-05, + "loss": 0.1791, + "step": 5841, + "teacher_loss": 0.1702657788991928 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.5586618185043335, + "learning_rate": 2.5337574092814806e-05, + "loss": 0.3026, + "step": 5842, + "teacher_loss": 0.2741256654262543 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.471606582403183, + "learning_rate": 2.5341911233193583e-05, + "loss": 0.2436, + "step": 5843, + "teacher_loss": 0.21825124323368073 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.30049288272857666, + "learning_rate": 2.534624837357236e-05, + "loss": 0.249, + "step": 5844, + "teacher_loss": 0.24324896931648254 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.3186829686164856, + "learning_rate": 2.5350585513951135e-05, + "loss": 0.2055, + "step": 5845, + "teacher_loss": 0.19293229281902313 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.7967687845230103, + "learning_rate": 2.5354922654329913e-05, + "loss": 0.3799, + "step": 5846, + "teacher_loss": 0.3335472643375397 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.7419020533561707, + "learning_rate": 2.5359259794708687e-05, + "loss": 0.3289, + "step": 5847, + "teacher_loss": 0.2830204665660858 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.36382153630256653, + "learning_rate": 2.5363596935087465e-05, + "loss": 0.2382, + "step": 5848, + "teacher_loss": 0.2242489457130432 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.32528695464134216, + "learning_rate": 2.5367934075466242e-05, + "loss": 0.4005, + "step": 5849, + "teacher_loss": 0.40881454944610596 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.19113288819789886, + "learning_rate": 2.537227121584502e-05, + "loss": 0.2025, + "step": 5850, + "teacher_loss": 0.20375216007232666 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 1.136780023574829, + "learning_rate": 2.5376608356223798e-05, + "loss": 0.323, + "step": 5851, + "teacher_loss": 0.23253172636032104 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.4320363998413086, + "learning_rate": 2.5380945496602575e-05, + "loss": 0.278, + "step": 5852, + "teacher_loss": 0.2608439326286316 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.42010578513145447, + "learning_rate": 2.5385282636981353e-05, + "loss": 0.2139, + "step": 5853, + "teacher_loss": 0.19101954996585846 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.1719742715358734, + "learning_rate": 2.538961977736013e-05, + "loss": 0.1616, + "step": 5854, + "teacher_loss": 0.16040191054344177 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.3774372637271881, + "learning_rate": 2.5393956917738905e-05, + "loss": 0.2081, + "step": 5855, + "teacher_loss": 0.18931914865970612 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.6080065369606018, + "learning_rate": 2.539829405811768e-05, + "loss": 0.2938, + "step": 5856, + "teacher_loss": 0.2588987648487091 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.41710320115089417, + "learning_rate": 2.5402631198496457e-05, + "loss": 0.2777, + "step": 5857, + "teacher_loss": 0.2621798813343048 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.9423816800117493, + "learning_rate": 2.5406968338875234e-05, + "loss": 0.524, + "step": 5858, + "teacher_loss": 0.4775207042694092 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.3307334780693054, + "learning_rate": 2.5411305479254012e-05, + "loss": 0.3065, + "step": 5859, + "teacher_loss": 0.3037795424461365 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.48244673013687134, + "learning_rate": 2.541564261963279e-05, + "loss": 0.2829, + "step": 5860, + "teacher_loss": 0.26077666878700256 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.49170243740081787, + "learning_rate": 2.5419979760011567e-05, + "loss": 0.3322, + "step": 5861, + "teacher_loss": 0.31453025341033936 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.6568790674209595, + "learning_rate": 2.5424316900390345e-05, + "loss": 0.264, + "step": 5862, + "teacher_loss": 0.2203400433063507 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.41657501459121704, + "learning_rate": 2.5428654040769122e-05, + "loss": 0.2149, + "step": 5863, + "teacher_loss": 0.19251692295074463 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.7967163324356079, + "learning_rate": 2.54329911811479e-05, + "loss": 0.3457, + "step": 5864, + "teacher_loss": 0.29555270075798035 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.19436734914779663, + "learning_rate": 2.5437328321526674e-05, + "loss": 0.2112, + "step": 5865, + "teacher_loss": 0.21310940384864807 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.6193044781684875, + "learning_rate": 2.544166546190545e-05, + "loss": 0.236, + "step": 5866, + "teacher_loss": 0.19336827099323273 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.6635338068008423, + "learning_rate": 2.5446002602284226e-05, + "loss": 0.3074, + "step": 5867, + "teacher_loss": 0.26778626441955566 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.4893947243690491, + "learning_rate": 2.5450339742663004e-05, + "loss": 0.209, + "step": 5868, + "teacher_loss": 0.17782796919345856 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.317904531955719, + "learning_rate": 2.545467688304178e-05, + "loss": 0.2147, + "step": 5869, + "teacher_loss": 0.20328830182552338 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.30255621671676636, + "learning_rate": 2.545901402342056e-05, + "loss": 0.2919, + "step": 5870, + "teacher_loss": 0.2906605899333954 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.5360873937606812, + "learning_rate": 2.5463351163799337e-05, + "loss": 0.4032, + "step": 5871, + "teacher_loss": 0.3884846270084381 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.4596640467643738, + "learning_rate": 2.5467688304178114e-05, + "loss": 0.1922, + "step": 5872, + "teacher_loss": 0.16249053180217743 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.7049658298492432, + "learning_rate": 2.547202544455689e-05, + "loss": 0.3276, + "step": 5873, + "teacher_loss": 0.2856695055961609 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.12569168210029602, + "learning_rate": 2.5476362584935666e-05, + "loss": 0.2239, + "step": 5874, + "teacher_loss": 0.23479053378105164 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.36887308955192566, + "learning_rate": 2.5480699725314444e-05, + "loss": 0.2689, + "step": 5875, + "teacher_loss": 0.257793128490448 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.5283715128898621, + "learning_rate": 2.548503686569322e-05, + "loss": 0.3153, + "step": 5876, + "teacher_loss": 0.2916357219219208 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.7359654903411865, + "learning_rate": 2.5489374006071995e-05, + "loss": 0.2732, + "step": 5877, + "teacher_loss": 0.2217286378145218 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 1.105780005455017, + "learning_rate": 2.5493711146450773e-05, + "loss": 0.3918, + "step": 5878, + "teacher_loss": 0.3124650716781616 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.280916303396225, + "learning_rate": 2.549804828682955e-05, + "loss": 0.2337, + "step": 5879, + "teacher_loss": 0.22843077778816223 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.15347811579704285, + "learning_rate": 2.550238542720833e-05, + "loss": 0.1708, + "step": 5880, + "teacher_loss": 0.172685444355011 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.637710690498352, + "learning_rate": 2.5506722567587106e-05, + "loss": 0.2786, + "step": 5881, + "teacher_loss": 0.23868566751480103 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 1.009932041168213, + "learning_rate": 2.551105970796588e-05, + "loss": 0.3413, + "step": 5882, + "teacher_loss": 0.26702213287353516 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.42496317625045776, + "learning_rate": 2.5515396848344658e-05, + "loss": 0.2825, + "step": 5883, + "teacher_loss": 0.2666654586791992 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.5348008275032043, + "learning_rate": 2.5519733988723435e-05, + "loss": 0.3021, + "step": 5884, + "teacher_loss": 0.27626293897628784 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.2823844850063324, + "learning_rate": 2.5524071129102213e-05, + "loss": 0.2508, + "step": 5885, + "teacher_loss": 0.2472800314426422 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 1.1398496627807617, + "learning_rate": 2.552840826948099e-05, + "loss": 0.3568, + "step": 5886, + "teacher_loss": 0.2697896957397461 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.27828001976013184, + "learning_rate": 2.5532745409859768e-05, + "loss": 0.2207, + "step": 5887, + "teacher_loss": 0.21430817246437073 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.28940826654434204, + "learning_rate": 2.5537082550238543e-05, + "loss": 0.2113, + "step": 5888, + "teacher_loss": 0.20263460278511047 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.31656792759895325, + "learning_rate": 2.554141969061732e-05, + "loss": 0.2262, + "step": 5889, + "teacher_loss": 0.2161426842212677 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.5206784605979919, + "learning_rate": 2.5545756830996098e-05, + "loss": 0.3178, + "step": 5890, + "teacher_loss": 0.2952684760093689 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.48077425360679626, + "learning_rate": 2.5550093971374872e-05, + "loss": 0.2698, + "step": 5891, + "teacher_loss": 0.246351957321167 + }, + { + "compression_loss": 0.0, + "epoch": 1.06, + "label_loss": 0.27060964703559875, + "learning_rate": 2.555443111175365e-05, + "loss": 0.193, + "step": 5892, + "teacher_loss": 0.18432685732841492 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4760136604309082, + "learning_rate": 2.5558768252132427e-05, + "loss": 0.2941, + "step": 5893, + "teacher_loss": 0.27389171719551086 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.3370354175567627, + "learning_rate": 2.5563105392511205e-05, + "loss": 0.3231, + "step": 5894, + "teacher_loss": 0.32158249616622925 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.22372743487358093, + "learning_rate": 2.5567442532889983e-05, + "loss": 0.2038, + "step": 5895, + "teacher_loss": 0.20161354541778564 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.570643424987793, + "learning_rate": 2.557177967326876e-05, + "loss": 0.2991, + "step": 5896, + "teacher_loss": 0.26888638734817505 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6290431022644043, + "learning_rate": 2.5576116813647538e-05, + "loss": 0.2893, + "step": 5897, + "teacher_loss": 0.25152814388275146 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.31510812044143677, + "learning_rate": 2.5580453954026315e-05, + "loss": 0.2134, + "step": 5898, + "teacher_loss": 0.20211388170719147 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.12859070301055908, + "learning_rate": 2.558479109440509e-05, + "loss": 0.2, + "step": 5899, + "teacher_loss": 0.20790529251098633 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.3454161584377289, + "learning_rate": 2.5589128234783864e-05, + "loss": 0.2044, + "step": 5900, + "teacher_loss": 0.18876144289970398 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4632948040962219, + "learning_rate": 2.559346537516264e-05, + "loss": 0.2802, + "step": 5901, + "teacher_loss": 0.25980833172798157 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.5297257304191589, + "learning_rate": 2.559780251554142e-05, + "loss": 0.2383, + "step": 5902, + "teacher_loss": 0.20586737990379333 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.7255809307098389, + "learning_rate": 2.5602139655920197e-05, + "loss": 0.3785, + "step": 5903, + "teacher_loss": 0.3399866223335266 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4395931363105774, + "learning_rate": 2.5606476796298974e-05, + "loss": 0.4322, + "step": 5904, + "teacher_loss": 0.4313408136367798 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4237062633037567, + "learning_rate": 2.5610813936677752e-05, + "loss": 0.2627, + "step": 5905, + "teacher_loss": 0.24483072757720947 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.27004578709602356, + "learning_rate": 2.561515107705653e-05, + "loss": 0.2218, + "step": 5906, + "teacher_loss": 0.21646207571029663 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.7203580141067505, + "learning_rate": 2.5619488217435307e-05, + "loss": 0.5771, + "step": 5907, + "teacher_loss": 0.5611433982849121 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.34088563919067383, + "learning_rate": 2.5623825357814085e-05, + "loss": 0.2054, + "step": 5908, + "teacher_loss": 0.19036734104156494 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6013332605361938, + "learning_rate": 2.562816249819286e-05, + "loss": 0.3419, + "step": 5909, + "teacher_loss": 0.3130248486995697 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.5472279787063599, + "learning_rate": 2.5632499638571633e-05, + "loss": 0.3055, + "step": 5910, + "teacher_loss": 0.2786266803741455 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4141944646835327, + "learning_rate": 2.563683677895041e-05, + "loss": 0.2094, + "step": 5911, + "teacher_loss": 0.18663941323757172 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.3748759329319, + "learning_rate": 2.564117391932919e-05, + "loss": 0.1788, + "step": 5912, + "teacher_loss": 0.15697060525417328 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.3228701949119568, + "learning_rate": 2.5645511059707966e-05, + "loss": 0.2886, + "step": 5913, + "teacher_loss": 0.2848265469074249 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6506280899047852, + "learning_rate": 2.5649848200086744e-05, + "loss": 0.4176, + "step": 5914, + "teacher_loss": 0.3917348086833954 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.456601083278656, + "learning_rate": 2.565418534046552e-05, + "loss": 0.2415, + "step": 5915, + "teacher_loss": 0.21759629249572754 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.21757668256759644, + "learning_rate": 2.56585224808443e-05, + "loss": 0.2023, + "step": 5916, + "teacher_loss": 0.2006537914276123 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.48809731006622314, + "learning_rate": 2.5662859621223073e-05, + "loss": 0.3167, + "step": 5917, + "teacher_loss": 0.29768240451812744 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.5608499050140381, + "learning_rate": 2.566719676160185e-05, + "loss": 0.2747, + "step": 5918, + "teacher_loss": 0.24287322163581848 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.3928006887435913, + "learning_rate": 2.567153390198063e-05, + "loss": 0.2987, + "step": 5919, + "teacher_loss": 0.28828299045562744 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.32989317178726196, + "learning_rate": 2.5675871042359406e-05, + "loss": 0.2181, + "step": 5920, + "teacher_loss": 0.20568659901618958 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6060162782669067, + "learning_rate": 2.568020818273818e-05, + "loss": 0.3946, + "step": 5921, + "teacher_loss": 0.37109583616256714 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.5648013353347778, + "learning_rate": 2.5684545323116958e-05, + "loss": 0.2508, + "step": 5922, + "teacher_loss": 0.21590884029865265 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.468291699886322, + "learning_rate": 2.5688882463495736e-05, + "loss": 0.2162, + "step": 5923, + "teacher_loss": 0.1881471574306488 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 1.1098542213439941, + "learning_rate": 2.5693219603874513e-05, + "loss": 0.4604, + "step": 5924, + "teacher_loss": 0.3882291913032532 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.2893470227718353, + "learning_rate": 2.569755674425329e-05, + "loss": 0.2013, + "step": 5925, + "teacher_loss": 0.19151625037193298 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6373051404953003, + "learning_rate": 2.5701893884632065e-05, + "loss": 0.265, + "step": 5926, + "teacher_loss": 0.22358834743499756 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.25862058997154236, + "learning_rate": 2.5706231025010843e-05, + "loss": 0.2437, + "step": 5927, + "teacher_loss": 0.24203769862651825 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.33675605058670044, + "learning_rate": 2.571056816538962e-05, + "loss": 0.1645, + "step": 5928, + "teacher_loss": 0.1453983187675476 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4254949390888214, + "learning_rate": 2.5714905305768398e-05, + "loss": 0.3028, + "step": 5929, + "teacher_loss": 0.2891344130039215 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.9150857925415039, + "learning_rate": 2.5719242446147176e-05, + "loss": 0.2211, + "step": 5930, + "teacher_loss": 0.14401228725910187 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4399152994155884, + "learning_rate": 2.5723579586525953e-05, + "loss": 0.3819, + "step": 5931, + "teacher_loss": 0.3754241168498993 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6309801340103149, + "learning_rate": 2.5727916726904727e-05, + "loss": 0.2407, + "step": 5932, + "teacher_loss": 0.19732235372066498 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.7707975506782532, + "learning_rate": 2.5732253867283505e-05, + "loss": 0.6893, + "step": 5933, + "teacher_loss": 0.6802951097488403 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.29874110221862793, + "learning_rate": 2.5736591007662283e-05, + "loss": 0.2685, + "step": 5934, + "teacher_loss": 0.2651068866252899 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.8050134778022766, + "learning_rate": 2.5740928148041057e-05, + "loss": 0.327, + "step": 5935, + "teacher_loss": 0.2739187777042389 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.4073670506477356, + "learning_rate": 2.5745265288419835e-05, + "loss": 0.2497, + "step": 5936, + "teacher_loss": 0.23214513063430786 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.38365501165390015, + "learning_rate": 2.5749602428798612e-05, + "loss": 0.1825, + "step": 5937, + "teacher_loss": 0.16010083258152008 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.303866982460022, + "learning_rate": 2.575393956917739e-05, + "loss": 0.2223, + "step": 5938, + "teacher_loss": 0.21328914165496826 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.2488536536693573, + "learning_rate": 2.5758276709556167e-05, + "loss": 0.1934, + "step": 5939, + "teacher_loss": 0.18718531727790833 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.2682528793811798, + "learning_rate": 2.5762613849934945e-05, + "loss": 0.1822, + "step": 5940, + "teacher_loss": 0.17265480756759644 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.22913682460784912, + "learning_rate": 2.5766950990313723e-05, + "loss": 0.2926, + "step": 5941, + "teacher_loss": 0.29960906505584717 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.7226784229278564, + "learning_rate": 2.57712881306925e-05, + "loss": 0.3916, + "step": 5942, + "teacher_loss": 0.35483598709106445 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6695114374160767, + "learning_rate": 2.5775625271071274e-05, + "loss": 0.2975, + "step": 5943, + "teacher_loss": 0.25612255930900574 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.5774508714675903, + "learning_rate": 2.577996241145005e-05, + "loss": 0.2427, + "step": 5944, + "teacher_loss": 0.20555315911769867 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.36685413122177124, + "learning_rate": 2.5784299551828826e-05, + "loss": 0.2987, + "step": 5945, + "teacher_loss": 0.29113471508026123 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.8600008487701416, + "learning_rate": 2.5788636692207604e-05, + "loss": 0.7383, + "step": 5946, + "teacher_loss": 0.7247945666313171 + }, + { + "compression_loss": 0.0, + "epoch": 1.07, + "label_loss": 0.6512706279754639, + "learning_rate": 2.579297383258638e-05, + "loss": 0.3005, + "step": 5947, + "teacher_loss": 0.2615642845630646 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.38632798194885254, + "learning_rate": 2.579731097296516e-05, + "loss": 0.186, + "step": 5948, + "teacher_loss": 0.16370287537574768 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.18378019332885742, + "learning_rate": 2.5801648113343937e-05, + "loss": 0.2362, + "step": 5949, + "teacher_loss": 0.241979718208313 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.2505667209625244, + "learning_rate": 2.5805985253722714e-05, + "loss": 0.227, + "step": 5950, + "teacher_loss": 0.22441637516021729 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5794561505317688, + "learning_rate": 2.5810322394101492e-05, + "loss": 0.268, + "step": 5951, + "teacher_loss": 0.23337849974632263 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.21044307947158813, + "learning_rate": 2.581465953448027e-05, + "loss": 0.261, + "step": 5952, + "teacher_loss": 0.2665625512599945 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.2843470275402069, + "learning_rate": 2.5818996674859044e-05, + "loss": 0.2034, + "step": 5953, + "teacher_loss": 0.19435307383537292 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3444425165653229, + "learning_rate": 2.5823333815237818e-05, + "loss": 0.2487, + "step": 5954, + "teacher_loss": 0.23807649314403534 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6940368413925171, + "learning_rate": 2.5827670955616596e-05, + "loss": 0.3408, + "step": 5955, + "teacher_loss": 0.30152422189712524 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.47406646609306335, + "learning_rate": 2.5832008095995373e-05, + "loss": 0.2891, + "step": 5956, + "teacher_loss": 0.2685551643371582 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.39342159032821655, + "learning_rate": 2.583634523637415e-05, + "loss": 0.2497, + "step": 5957, + "teacher_loss": 0.23367643356323242 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 1.0082088708877563, + "learning_rate": 2.584068237675293e-05, + "loss": 0.3077, + "step": 5958, + "teacher_loss": 0.2298777997493744 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.2065315544605255, + "learning_rate": 2.5845019517131706e-05, + "loss": 0.2376, + "step": 5959, + "teacher_loss": 0.24100585281848907 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.15253156423568726, + "learning_rate": 2.5849356657510484e-05, + "loss": 0.1968, + "step": 5960, + "teacher_loss": 0.20173221826553345 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.1254030168056488, + "learning_rate": 2.5853693797889258e-05, + "loss": 0.2073, + "step": 5961, + "teacher_loss": 0.21641525626182556 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5679243206977844, + "learning_rate": 2.5858030938268036e-05, + "loss": 0.2454, + "step": 5962, + "teacher_loss": 0.2095583826303482 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.46123939752578735, + "learning_rate": 2.5862368078646813e-05, + "loss": 0.298, + "step": 5963, + "teacher_loss": 0.2798651158809662 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.28733736276626587, + "learning_rate": 2.5866705219025588e-05, + "loss": 0.2206, + "step": 5964, + "teacher_loss": 0.21321985125541687 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3101140856742859, + "learning_rate": 2.5871042359404365e-05, + "loss": 0.163, + "step": 5965, + "teacher_loss": 0.14664681255817413 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.2252921462059021, + "learning_rate": 2.5875379499783143e-05, + "loss": 0.2115, + "step": 5966, + "teacher_loss": 0.2099440097808838 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6086270809173584, + "learning_rate": 2.587971664016192e-05, + "loss": 0.1917, + "step": 5967, + "teacher_loss": 0.14541780948638916 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3337209224700928, + "learning_rate": 2.5884053780540698e-05, + "loss": 0.345, + "step": 5968, + "teacher_loss": 0.34627565741539 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5281961560249329, + "learning_rate": 2.5888390920919476e-05, + "loss": 0.2711, + "step": 5969, + "teacher_loss": 0.2424831986427307 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6757916212081909, + "learning_rate": 2.589272806129825e-05, + "loss": 0.3229, + "step": 5970, + "teacher_loss": 0.2836615741252899 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5589495301246643, + "learning_rate": 2.5897065201677028e-05, + "loss": 0.2566, + "step": 5971, + "teacher_loss": 0.2230542153120041 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 1.0035693645477295, + "learning_rate": 2.5901402342055805e-05, + "loss": 0.3863, + "step": 5972, + "teacher_loss": 0.31774044036865234 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.21249422430992126, + "learning_rate": 2.5905739482434583e-05, + "loss": 0.1916, + "step": 5973, + "teacher_loss": 0.1892242431640625 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.16979198157787323, + "learning_rate": 2.591007662281336e-05, + "loss": 0.1955, + "step": 5974, + "teacher_loss": 0.19830450415611267 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.39572158455848694, + "learning_rate": 2.5914413763192135e-05, + "loss": 0.3735, + "step": 5975, + "teacher_loss": 0.37099185585975647 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5492358207702637, + "learning_rate": 2.5918750903570912e-05, + "loss": 0.2311, + "step": 5976, + "teacher_loss": 0.1957527995109558 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3307785093784332, + "learning_rate": 2.592308804394969e-05, + "loss": 0.2133, + "step": 5977, + "teacher_loss": 0.20025604963302612 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6876586675643921, + "learning_rate": 2.5927425184328468e-05, + "loss": 0.2919, + "step": 5978, + "teacher_loss": 0.2479683756828308 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.18187719583511353, + "learning_rate": 2.5931762324707242e-05, + "loss": 0.206, + "step": 5979, + "teacher_loss": 0.20870481431484222 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.4458603858947754, + "learning_rate": 2.593609946508602e-05, + "loss": 0.2449, + "step": 5980, + "teacher_loss": 0.2225208282470703 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6239303946495056, + "learning_rate": 2.5940436605464797e-05, + "loss": 0.2862, + "step": 5981, + "teacher_loss": 0.24867065250873566 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.24168676137924194, + "learning_rate": 2.5944773745843575e-05, + "loss": 0.1827, + "step": 5982, + "teacher_loss": 0.17615142464637756 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5222713947296143, + "learning_rate": 2.5949110886222352e-05, + "loss": 0.2483, + "step": 5983, + "teacher_loss": 0.2178906500339508 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.1576201617717743, + "learning_rate": 2.595344802660113e-05, + "loss": 0.2353, + "step": 5984, + "teacher_loss": 0.2439640462398529 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.666489839553833, + "learning_rate": 2.5957785166979908e-05, + "loss": 0.3303, + "step": 5985, + "teacher_loss": 0.29294466972351074 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.35782110691070557, + "learning_rate": 2.5962122307358682e-05, + "loss": 0.2485, + "step": 5986, + "teacher_loss": 0.23636741936206818 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3073885440826416, + "learning_rate": 2.596645944773746e-05, + "loss": 0.3342, + "step": 5987, + "teacher_loss": 0.3371885120868683 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5758130550384521, + "learning_rate": 2.5970796588116234e-05, + "loss": 0.3179, + "step": 5988, + "teacher_loss": 0.2892420291900635 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.4360622763633728, + "learning_rate": 2.597513372849501e-05, + "loss": 0.2939, + "step": 5989, + "teacher_loss": 0.27813878655433655 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.47281211614608765, + "learning_rate": 2.597947086887379e-05, + "loss": 0.2242, + "step": 5990, + "teacher_loss": 0.19659289717674255 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6105009317398071, + "learning_rate": 2.5983808009252566e-05, + "loss": 0.2288, + "step": 5991, + "teacher_loss": 0.18637344241142273 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.45067843794822693, + "learning_rate": 2.5988145149631344e-05, + "loss": 0.2379, + "step": 5992, + "teacher_loss": 0.2142692506313324 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.32397690415382385, + "learning_rate": 2.5992482290010122e-05, + "loss": 0.2699, + "step": 5993, + "teacher_loss": 0.26385828852653503 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.9340322613716125, + "learning_rate": 2.59968194303889e-05, + "loss": 0.3418, + "step": 5994, + "teacher_loss": 0.276030033826828 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.14384032785892487, + "learning_rate": 2.6001156570767677e-05, + "loss": 0.1514, + "step": 5995, + "teacher_loss": 0.1522909551858902 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.4057866632938385, + "learning_rate": 2.6005493711146455e-05, + "loss": 0.2251, + "step": 5996, + "teacher_loss": 0.20502130687236786 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3707001805305481, + "learning_rate": 2.6009830851525225e-05, + "loss": 0.231, + "step": 5997, + "teacher_loss": 0.21544378995895386 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.3754444718360901, + "learning_rate": 2.6014167991904003e-05, + "loss": 0.2654, + "step": 5998, + "teacher_loss": 0.2531641721725464 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.6523469686508179, + "learning_rate": 2.601850513228278e-05, + "loss": 0.3695, + "step": 5999, + "teacher_loss": 0.3380582928657532 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.33952778577804565, + "learning_rate": 2.6022842272661558e-05, + "loss": 0.2098, + "step": 6000, + "teacher_loss": 0.19543160498142242 + }, + { + "epoch": 1.08, + "eval_exact_match": 79.5364238410596, + "eval_f1": 87.05001005077841, + "step": 6000 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.5937745571136475, + "learning_rate": 2.6027179413040336e-05, + "loss": 0.3033, + "step": 6001, + "teacher_loss": 0.2710226774215698 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.299640417098999, + "learning_rate": 2.6031516553419114e-05, + "loss": 0.2471, + "step": 6002, + "teacher_loss": 0.24126553535461426 + }, + { + "compression_loss": 0.0, + "epoch": 1.08, + "label_loss": 0.44425278902053833, + "learning_rate": 2.603585369379789e-05, + "loss": 0.2404, + "step": 6003, + "teacher_loss": 0.21772953867912292 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.4244877099990845, + "learning_rate": 2.604019083417667e-05, + "loss": 0.2056, + "step": 6004, + "teacher_loss": 0.18127241730690002 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.19513829052448273, + "learning_rate": 2.6044527974555446e-05, + "loss": 0.2713, + "step": 6005, + "teacher_loss": 0.27981239557266235 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.40419185161590576, + "learning_rate": 2.604886511493422e-05, + "loss": 0.3355, + "step": 6006, + "teacher_loss": 0.3278812766075134 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.11089880764484406, + "learning_rate": 2.6053202255312998e-05, + "loss": 0.1926, + "step": 6007, + "teacher_loss": 0.20165523886680603 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.7942944169044495, + "learning_rate": 2.6057539395691772e-05, + "loss": 0.2654, + "step": 6008, + "teacher_loss": 0.20668178796768188 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.19885137677192688, + "learning_rate": 2.606187653607055e-05, + "loss": 0.2044, + "step": 6009, + "teacher_loss": 0.20500102639198303 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.32747045159339905, + "learning_rate": 2.6066213676449328e-05, + "loss": 0.2267, + "step": 6010, + "teacher_loss": 0.2154829502105713 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.2849360704421997, + "learning_rate": 2.6070550816828105e-05, + "loss": 0.1988, + "step": 6011, + "teacher_loss": 0.1891832947731018 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.35029563307762146, + "learning_rate": 2.6074887957206883e-05, + "loss": 0.2398, + "step": 6012, + "teacher_loss": 0.22751376032829285 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.8549168109893799, + "learning_rate": 2.607922509758566e-05, + "loss": 0.371, + "step": 6013, + "teacher_loss": 0.31726354360580444 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.5811904668807983, + "learning_rate": 2.6083562237964435e-05, + "loss": 0.3952, + "step": 6014, + "teacher_loss": 0.37448573112487793 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.20279422402381897, + "learning_rate": 2.6087899378343212e-05, + "loss": 0.1965, + "step": 6015, + "teacher_loss": 0.1957501471042633 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.27620694041252136, + "learning_rate": 2.609223651872199e-05, + "loss": 0.1722, + "step": 6016, + "teacher_loss": 0.16064776480197906 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.29156967997550964, + "learning_rate": 2.6096573659100768e-05, + "loss": 0.1766, + "step": 6017, + "teacher_loss": 0.1637905389070511 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3402637839317322, + "learning_rate": 2.6100910799479545e-05, + "loss": 0.1432, + "step": 6018, + "teacher_loss": 0.12128326296806335 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3765139579772949, + "learning_rate": 2.610524793985832e-05, + "loss": 0.2826, + "step": 6019, + "teacher_loss": 0.27220267057418823 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.37916266918182373, + "learning_rate": 2.6109585080237097e-05, + "loss": 0.2153, + "step": 6020, + "teacher_loss": 0.19704505801200867 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.12241204082965851, + "learning_rate": 2.6113922220615875e-05, + "loss": 0.1483, + "step": 6021, + "teacher_loss": 0.15118181705474854 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.29885610938072205, + "learning_rate": 2.6118259360994652e-05, + "loss": 0.2191, + "step": 6022, + "teacher_loss": 0.21021530032157898 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.5026082396507263, + "learning_rate": 2.6122596501373427e-05, + "loss": 0.2831, + "step": 6023, + "teacher_loss": 0.2587442994117737 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.9825842380523682, + "learning_rate": 2.6126933641752204e-05, + "loss": 0.3187, + "step": 6024, + "teacher_loss": 0.2449318766593933 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.5616557002067566, + "learning_rate": 2.6131270782130982e-05, + "loss": 0.2479, + "step": 6025, + "teacher_loss": 0.2130769044160843 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.21342787146568298, + "learning_rate": 2.613560792250976e-05, + "loss": 0.2465, + "step": 6026, + "teacher_loss": 0.25022411346435547 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.7556260824203491, + "learning_rate": 2.6139945062888537e-05, + "loss": 0.2636, + "step": 6027, + "teacher_loss": 0.2089563012123108 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3174939751625061, + "learning_rate": 2.6144282203267315e-05, + "loss": 0.3547, + "step": 6028, + "teacher_loss": 0.35886937379837036 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.20685821771621704, + "learning_rate": 2.6148619343646092e-05, + "loss": 0.1798, + "step": 6029, + "teacher_loss": 0.1768152415752411 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.7881063222885132, + "learning_rate": 2.6152956484024867e-05, + "loss": 0.3484, + "step": 6030, + "teacher_loss": 0.29956334829330444 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.21901699900627136, + "learning_rate": 2.6157293624403644e-05, + "loss": 0.2243, + "step": 6031, + "teacher_loss": 0.224918931722641 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3141971230506897, + "learning_rate": 2.616163076478242e-05, + "loss": 0.2688, + "step": 6032, + "teacher_loss": 0.26375895738601685 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.25679054856300354, + "learning_rate": 2.6165967905161196e-05, + "loss": 0.1841, + "step": 6033, + "teacher_loss": 0.17600971460342407 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.34429165720939636, + "learning_rate": 2.6170305045539974e-05, + "loss": 0.2677, + "step": 6034, + "teacher_loss": 0.25921428203582764 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.4749927818775177, + "learning_rate": 2.617464218591875e-05, + "loss": 0.2446, + "step": 6035, + "teacher_loss": 0.21900954842567444 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.26815804839134216, + "learning_rate": 2.617897932629753e-05, + "loss": 0.1884, + "step": 6036, + "teacher_loss": 0.17958931624889374 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6675317287445068, + "learning_rate": 2.6183316466676307e-05, + "loss": 0.2503, + "step": 6037, + "teacher_loss": 0.20397210121154785 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3510726988315582, + "learning_rate": 2.6187653607055084e-05, + "loss": 0.3506, + "step": 6038, + "teacher_loss": 0.3505968451499939 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6238422393798828, + "learning_rate": 2.6191990747433862e-05, + "loss": 0.2448, + "step": 6039, + "teacher_loss": 0.20266927778720856 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3425091803073883, + "learning_rate": 2.619632788781264e-05, + "loss": 0.3397, + "step": 6040, + "teacher_loss": 0.3393399119377136 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6078685522079468, + "learning_rate": 2.620066502819141e-05, + "loss": 0.3867, + "step": 6041, + "teacher_loss": 0.362124502658844 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.15485689043998718, + "learning_rate": 2.6205002168570188e-05, + "loss": 0.1364, + "step": 6042, + "teacher_loss": 0.1343672275543213 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.33681994676589966, + "learning_rate": 2.6209339308948966e-05, + "loss": 0.2945, + "step": 6043, + "teacher_loss": 0.2898402512073517 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6960360407829285, + "learning_rate": 2.6213676449327743e-05, + "loss": 0.2795, + "step": 6044, + "teacher_loss": 0.23319706320762634 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.4189578890800476, + "learning_rate": 2.621801358970652e-05, + "loss": 0.2388, + "step": 6045, + "teacher_loss": 0.21874213218688965 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.3852342367172241, + "learning_rate": 2.62223507300853e-05, + "loss": 0.2275, + "step": 6046, + "teacher_loss": 0.20999422669410706 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.4208112955093384, + "learning_rate": 2.6226687870464076e-05, + "loss": 0.2763, + "step": 6047, + "teacher_loss": 0.26026690006256104 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 1.6586482524871826, + "learning_rate": 2.6231025010842854e-05, + "loss": 0.3192, + "step": 6048, + "teacher_loss": 0.17038945853710175 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6716893911361694, + "learning_rate": 2.623536215122163e-05, + "loss": 0.3835, + "step": 6049, + "teacher_loss": 0.3514992594718933 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.2577231824398041, + "learning_rate": 2.6239699291600406e-05, + "loss": 0.1738, + "step": 6050, + "teacher_loss": 0.1644240915775299 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6482865810394287, + "learning_rate": 2.6244036431979183e-05, + "loss": 0.3334, + "step": 6051, + "teacher_loss": 0.2984405755996704 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.6949761509895325, + "learning_rate": 2.6248373572357957e-05, + "loss": 0.2554, + "step": 6052, + "teacher_loss": 0.20660439133644104 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.4513072967529297, + "learning_rate": 2.6252710712736735e-05, + "loss": 0.2422, + "step": 6053, + "teacher_loss": 0.21891817450523376 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.22213289141654968, + "learning_rate": 2.6257047853115513e-05, + "loss": 0.1995, + "step": 6054, + "teacher_loss": 0.19702666997909546 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.702265739440918, + "learning_rate": 2.626138499349429e-05, + "loss": 0.2779, + "step": 6055, + "teacher_loss": 0.2307310849428177 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.5064534544944763, + "learning_rate": 2.6265722133873068e-05, + "loss": 0.3491, + "step": 6056, + "teacher_loss": 0.33158668875694275 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.2842394709587097, + "learning_rate": 2.6270059274251845e-05, + "loss": 0.1983, + "step": 6057, + "teacher_loss": 0.18871907889842987 + }, + { + "compression_loss": 0.0, + "epoch": 1.09, + "label_loss": 0.4116808772087097, + "learning_rate": 2.627439641463062e-05, + "loss": 0.1906, + "step": 6058, + "teacher_loss": 0.1659819781780243 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.21029409766197205, + "learning_rate": 2.6278733555009397e-05, + "loss": 0.1923, + "step": 6059, + "teacher_loss": 0.1902952492237091 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.46655088663101196, + "learning_rate": 2.6283070695388175e-05, + "loss": 0.2749, + "step": 6060, + "teacher_loss": 0.2536253333091736 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.9944543838500977, + "learning_rate": 2.6287407835766953e-05, + "loss": 0.2972, + "step": 6061, + "teacher_loss": 0.2197316586971283 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.4365714192390442, + "learning_rate": 2.6291744976145727e-05, + "loss": 0.2747, + "step": 6062, + "teacher_loss": 0.25671032071113586 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5145626664161682, + "learning_rate": 2.6296082116524504e-05, + "loss": 0.2659, + "step": 6063, + "teacher_loss": 0.23826278746128082 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.6378750205039978, + "learning_rate": 2.6300419256903282e-05, + "loss": 0.3121, + "step": 6064, + "teacher_loss": 0.2758827209472656 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.6705365180969238, + "learning_rate": 2.630475639728206e-05, + "loss": 0.3703, + "step": 6065, + "teacher_loss": 0.33698129653930664 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3675013482570648, + "learning_rate": 2.6309093537660837e-05, + "loss": 0.24, + "step": 6066, + "teacher_loss": 0.22585347294807434 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.29251083731651306, + "learning_rate": 2.631343067803961e-05, + "loss": 0.3122, + "step": 6067, + "teacher_loss": 0.3143903315067291 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3683716654777527, + "learning_rate": 2.631776781841839e-05, + "loss": 0.2358, + "step": 6068, + "teacher_loss": 0.22107170522212982 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.6156793832778931, + "learning_rate": 2.6322104958797167e-05, + "loss": 0.2279, + "step": 6069, + "teacher_loss": 0.1848054975271225 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3482230305671692, + "learning_rate": 2.6326442099175944e-05, + "loss": 0.2395, + "step": 6070, + "teacher_loss": 0.22738529741764069 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3504316806793213, + "learning_rate": 2.6330779239554722e-05, + "loss": 0.2083, + "step": 6071, + "teacher_loss": 0.19246095418930054 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.12203864753246307, + "learning_rate": 2.63351163799335e-05, + "loss": 0.2279, + "step": 6072, + "teacher_loss": 0.23962397873401642 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.46075522899627686, + "learning_rate": 2.6339453520312274e-05, + "loss": 0.2758, + "step": 6073, + "teacher_loss": 0.2552947700023651 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.7439577579498291, + "learning_rate": 2.634379066069105e-05, + "loss": 0.3458, + "step": 6074, + "teacher_loss": 0.30151820182800293 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3551526665687561, + "learning_rate": 2.634812780106983e-05, + "loss": 0.254, + "step": 6075, + "teacher_loss": 0.24275073409080505 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.9436540603637695, + "learning_rate": 2.6352464941448603e-05, + "loss": 0.3511, + "step": 6076, + "teacher_loss": 0.2852747440338135 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.19243931770324707, + "learning_rate": 2.635680208182738e-05, + "loss": 0.2469, + "step": 6077, + "teacher_loss": 0.2529582977294922 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.47655391693115234, + "learning_rate": 2.636113922220616e-05, + "loss": 0.178, + "step": 6078, + "teacher_loss": 0.14487934112548828 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.7089213728904724, + "learning_rate": 2.6365476362584936e-05, + "loss": 0.3093, + "step": 6079, + "teacher_loss": 0.2648907005786896 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.2391085922718048, + "learning_rate": 2.6369813502963714e-05, + "loss": 0.1817, + "step": 6080, + "teacher_loss": 0.1752692013978958 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.7359232902526855, + "learning_rate": 2.637415064334249e-05, + "loss": 0.2313, + "step": 6081, + "teacher_loss": 0.17523014545440674 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5613267421722412, + "learning_rate": 2.637848778372127e-05, + "loss": 0.232, + "step": 6082, + "teacher_loss": 0.19544321298599243 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.4572908282279968, + "learning_rate": 2.6382824924100047e-05, + "loss": 0.2261, + "step": 6083, + "teacher_loss": 0.20043939352035522 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.45113933086395264, + "learning_rate": 2.638716206447882e-05, + "loss": 0.432, + "step": 6084, + "teacher_loss": 0.4298262894153595 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5387110114097595, + "learning_rate": 2.6391499204857595e-05, + "loss": 0.2139, + "step": 6085, + "teacher_loss": 0.17777490615844727 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5614681243896484, + "learning_rate": 2.6395836345236373e-05, + "loss": 0.2342, + "step": 6086, + "teacher_loss": 0.19789093732833862 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.40348750352859497, + "learning_rate": 2.640017348561515e-05, + "loss": 0.2325, + "step": 6087, + "teacher_loss": 0.21351078152656555 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.2748577296733856, + "learning_rate": 2.6404510625993928e-05, + "loss": 0.1867, + "step": 6088, + "teacher_loss": 0.17685964703559875 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.2772558033466339, + "learning_rate": 2.6408847766372706e-05, + "loss": 0.2171, + "step": 6089, + "teacher_loss": 0.21038465201854706 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.43373793363571167, + "learning_rate": 2.6413184906751483e-05, + "loss": 0.2217, + "step": 6090, + "teacher_loss": 0.1981760859489441 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5810621976852417, + "learning_rate": 2.641752204713026e-05, + "loss": 0.2851, + "step": 6091, + "teacher_loss": 0.2522633671760559 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.25044485926628113, + "learning_rate": 2.642185918750904e-05, + "loss": 0.2063, + "step": 6092, + "teacher_loss": 0.20138826966285706 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.32210400700569153, + "learning_rate": 2.6426196327887816e-05, + "loss": 0.2219, + "step": 6093, + "teacher_loss": 0.2108059823513031 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.42475032806396484, + "learning_rate": 2.643053346826659e-05, + "loss": 0.2774, + "step": 6094, + "teacher_loss": 0.26100534200668335 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3819844722747803, + "learning_rate": 2.6434870608645365e-05, + "loss": 0.2082, + "step": 6095, + "teacher_loss": 0.18890786170959473 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.40380099415779114, + "learning_rate": 2.6439207749024142e-05, + "loss": 0.3836, + "step": 6096, + "teacher_loss": 0.3813990652561188 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.8675791025161743, + "learning_rate": 2.644354488940292e-05, + "loss": 0.5083, + "step": 6097, + "teacher_loss": 0.46836966276168823 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5458240509033203, + "learning_rate": 2.6447882029781697e-05, + "loss": 0.2763, + "step": 6098, + "teacher_loss": 0.24630212783813477 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.7106964588165283, + "learning_rate": 2.6452219170160475e-05, + "loss": 0.3054, + "step": 6099, + "teacher_loss": 0.2604144811630249 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 1.066803216934204, + "learning_rate": 2.6456556310539253e-05, + "loss": 0.3294, + "step": 6100, + "teacher_loss": 0.24748049676418304 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5097984671592712, + "learning_rate": 2.646089345091803e-05, + "loss": 0.2988, + "step": 6101, + "teacher_loss": 0.275393009185791 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5041478872299194, + "learning_rate": 2.6465230591296805e-05, + "loss": 0.2165, + "step": 6102, + "teacher_loss": 0.1845349371433258 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.6240526437759399, + "learning_rate": 2.6469567731675582e-05, + "loss": 0.2784, + "step": 6103, + "teacher_loss": 0.23996350169181824 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.2629654109477997, + "learning_rate": 2.647390487205436e-05, + "loss": 0.2696, + "step": 6104, + "teacher_loss": 0.27039217948913574 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.2751466631889343, + "learning_rate": 2.6478242012433137e-05, + "loss": 0.1959, + "step": 6105, + "teacher_loss": 0.1871006041765213 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.19068299233913422, + "learning_rate": 2.648257915281191e-05, + "loss": 0.139, + "step": 6106, + "teacher_loss": 0.13321253657341003 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.42008715867996216, + "learning_rate": 2.648691629319069e-05, + "loss": 0.1957, + "step": 6107, + "teacher_loss": 0.1707797646522522 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.18562796711921692, + "learning_rate": 2.6491253433569467e-05, + "loss": 0.208, + "step": 6108, + "teacher_loss": 0.21046766638755798 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.49823611974716187, + "learning_rate": 2.6495590573948245e-05, + "loss": 0.4236, + "step": 6109, + "teacher_loss": 0.41528481245040894 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.22772350907325745, + "learning_rate": 2.6499927714327022e-05, + "loss": 0.2634, + "step": 6110, + "teacher_loss": 0.267348974943161 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.23748721182346344, + "learning_rate": 2.6504264854705796e-05, + "loss": 0.2111, + "step": 6111, + "teacher_loss": 0.20818842947483063 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.3436656594276428, + "learning_rate": 2.6508601995084574e-05, + "loss": 0.2044, + "step": 6112, + "teacher_loss": 0.1889735460281372 + }, + { + "compression_loss": 0.0, + "epoch": 1.1, + "label_loss": 0.5322515368461609, + "learning_rate": 2.651293913546335e-05, + "loss": 0.2163, + "step": 6113, + "teacher_loss": 0.18124409019947052 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.6623352766036987, + "learning_rate": 2.651727627584213e-05, + "loss": 0.444, + "step": 6114, + "teacher_loss": 0.4197794795036316 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.24485337734222412, + "learning_rate": 2.6521613416220907e-05, + "loss": 0.1683, + "step": 6115, + "teacher_loss": 0.1597769856452942 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.515722393989563, + "learning_rate": 2.6525950556599685e-05, + "loss": 0.2977, + "step": 6116, + "teacher_loss": 0.2734895944595337 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.2177710235118866, + "learning_rate": 2.653028769697846e-05, + "loss": 0.2703, + "step": 6117, + "teacher_loss": 0.27613985538482666 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.2102414071559906, + "learning_rate": 2.6534624837357236e-05, + "loss": 0.1416, + "step": 6118, + "teacher_loss": 0.13402248919010162 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.3124125897884369, + "learning_rate": 2.6538961977736014e-05, + "loss": 0.1724, + "step": 6119, + "teacher_loss": 0.15681597590446472 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.5765262842178345, + "learning_rate": 2.6543299118114788e-05, + "loss": 0.2686, + "step": 6120, + "teacher_loss": 0.2343907356262207 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.35706061124801636, + "learning_rate": 2.6547636258493566e-05, + "loss": 0.3485, + "step": 6121, + "teacher_loss": 0.3475823998451233 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.6210277080535889, + "learning_rate": 2.6551973398872343e-05, + "loss": 0.3031, + "step": 6122, + "teacher_loss": 0.26776760816574097 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.17806726694107056, + "learning_rate": 2.655631053925112e-05, + "loss": 0.303, + "step": 6123, + "teacher_loss": 0.31689774990081787 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.37102100253105164, + "learning_rate": 2.65606476796299e-05, + "loss": 0.2713, + "step": 6124, + "teacher_loss": 0.26021063327789307 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.567483127117157, + "learning_rate": 2.6564984820008676e-05, + "loss": 0.2305, + "step": 6125, + "teacher_loss": 0.1930255889892578 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.6561061143875122, + "learning_rate": 2.6569321960387454e-05, + "loss": 0.348, + "step": 6126, + "teacher_loss": 0.31376904249191284 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.048316821455955505, + "learning_rate": 2.657365910076623e-05, + "loss": 0.1274, + "step": 6127, + "teacher_loss": 0.13622258603572845 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.700487494468689, + "learning_rate": 2.6577996241145006e-05, + "loss": 0.5277, + "step": 6128, + "teacher_loss": 0.5085031986236572 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.8071776628494263, + "learning_rate": 2.658233338152378e-05, + "loss": 0.3102, + "step": 6129, + "teacher_loss": 0.25495368242263794 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.7379237413406372, + "learning_rate": 2.6586670521902558e-05, + "loss": 0.2368, + "step": 6130, + "teacher_loss": 0.18112680315971375 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.11972502619028091, + "learning_rate": 2.6591007662281335e-05, + "loss": 0.1386, + "step": 6131, + "teacher_loss": 0.14070644974708557 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.245040625333786, + "learning_rate": 2.6595344802660113e-05, + "loss": 0.2793, + "step": 6132, + "teacher_loss": 0.28315672278404236 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.8436753749847412, + "learning_rate": 2.659968194303889e-05, + "loss": 0.3104, + "step": 6133, + "teacher_loss": 0.251151442527771 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.5027743577957153, + "learning_rate": 2.6604019083417668e-05, + "loss": 0.2507, + "step": 6134, + "teacher_loss": 0.22271570563316345 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.49747928977012634, + "learning_rate": 2.6608356223796446e-05, + "loss": 0.4045, + "step": 6135, + "teacher_loss": 0.3942154049873352 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.635773777961731, + "learning_rate": 2.6612693364175223e-05, + "loss": 0.2378, + "step": 6136, + "teacher_loss": 0.19354480504989624 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.40331166982650757, + "learning_rate": 2.6617030504554e-05, + "loss": 0.2102, + "step": 6137, + "teacher_loss": 0.18878497183322906 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.21453385055065155, + "learning_rate": 2.6621367644932775e-05, + "loss": 0.1995, + "step": 6138, + "teacher_loss": 0.19777438044548035 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.8680122494697571, + "learning_rate": 2.662570478531155e-05, + "loss": 0.3962, + "step": 6139, + "teacher_loss": 0.34378787875175476 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.48129940032958984, + "learning_rate": 2.6630041925690327e-05, + "loss": 0.2093, + "step": 6140, + "teacher_loss": 0.1790616810321808 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.736588716506958, + "learning_rate": 2.6634379066069105e-05, + "loss": 0.3939, + "step": 6141, + "teacher_loss": 0.35582658648490906 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.2747771143913269, + "learning_rate": 2.6638716206447882e-05, + "loss": 0.2271, + "step": 6142, + "teacher_loss": 0.22183355689048767 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.4167356491088867, + "learning_rate": 2.664305334682666e-05, + "loss": 0.2534, + "step": 6143, + "teacher_loss": 0.23520107567310333 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.2032022476196289, + "learning_rate": 2.6647390487205438e-05, + "loss": 0.2951, + "step": 6144, + "teacher_loss": 0.30530422925949097 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.7460829615592957, + "learning_rate": 2.6651727627584215e-05, + "loss": 0.3492, + "step": 6145, + "teacher_loss": 0.30509495735168457 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.3391437232494354, + "learning_rate": 2.665606476796299e-05, + "loss": 0.313, + "step": 6146, + "teacher_loss": 0.31009337306022644 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.3658826947212219, + "learning_rate": 2.6660401908341767e-05, + "loss": 0.1954, + "step": 6147, + "teacher_loss": 0.1764914095401764 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.6718495488166809, + "learning_rate": 2.6664739048720545e-05, + "loss": 0.3276, + "step": 6148, + "teacher_loss": 0.28934958577156067 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.2850288450717926, + "learning_rate": 2.6669076189099322e-05, + "loss": 0.3539, + "step": 6149, + "teacher_loss": 0.36151546239852905 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.30485427379608154, + "learning_rate": 2.6673413329478097e-05, + "loss": 0.1925, + "step": 6150, + "teacher_loss": 0.18000689148902893 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.43627816438674927, + "learning_rate": 2.6677750469856874e-05, + "loss": 0.2133, + "step": 6151, + "teacher_loss": 0.18855538964271545 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.4119974970817566, + "learning_rate": 2.6682087610235652e-05, + "loss": 0.1896, + "step": 6152, + "teacher_loss": 0.16485372185707092 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.3464997410774231, + "learning_rate": 2.668642475061443e-05, + "loss": 0.2839, + "step": 6153, + "teacher_loss": 0.2769213318824768 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.29194486141204834, + "learning_rate": 2.6690761890993207e-05, + "loss": 0.1984, + "step": 6154, + "teacher_loss": 0.1880495250225067 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.34926748275756836, + "learning_rate": 2.669509903137198e-05, + "loss": 0.2332, + "step": 6155, + "teacher_loss": 0.22029638290405273 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.30133694410324097, + "learning_rate": 2.669943617175076e-05, + "loss": 0.1693, + "step": 6156, + "teacher_loss": 0.15467047691345215 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.5656158924102783, + "learning_rate": 2.6703773312129537e-05, + "loss": 0.3686, + "step": 6157, + "teacher_loss": 0.3467395007610321 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.32877397537231445, + "learning_rate": 2.6708110452508314e-05, + "loss": 0.184, + "step": 6158, + "teacher_loss": 0.16793489456176758 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.724859356880188, + "learning_rate": 2.6712447592887092e-05, + "loss": 0.2803, + "step": 6159, + "teacher_loss": 0.23089691996574402 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.3149811029434204, + "learning_rate": 2.6716784733265866e-05, + "loss": 0.2919, + "step": 6160, + "teacher_loss": 0.2893643081188202 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.5111450552940369, + "learning_rate": 2.6721121873644644e-05, + "loss": 0.2102, + "step": 6161, + "teacher_loss": 0.17675435543060303 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.9811246991157532, + "learning_rate": 2.672545901402342e-05, + "loss": 0.2637, + "step": 6162, + "teacher_loss": 0.18399053812026978 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.6269147396087646, + "learning_rate": 2.67297961544022e-05, + "loss": 0.2869, + "step": 6163, + "teacher_loss": 0.24911335110664368 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.42970097064971924, + "learning_rate": 2.6734133294780973e-05, + "loss": 0.2521, + "step": 6164, + "teacher_loss": 0.23242150247097015 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.5245120525360107, + "learning_rate": 2.673847043515975e-05, + "loss": 0.2382, + "step": 6165, + "teacher_loss": 0.20643754303455353 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.45311814546585083, + "learning_rate": 2.674280757553853e-05, + "loss": 0.2968, + "step": 6166, + "teacher_loss": 0.2793947458267212 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.25458672642707825, + "learning_rate": 2.6747144715917306e-05, + "loss": 0.2347, + "step": 6167, + "teacher_loss": 0.23254278302192688 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.8695564270019531, + "learning_rate": 2.6751481856296084e-05, + "loss": 0.4433, + "step": 6168, + "teacher_loss": 0.39592310786247253 + }, + { + "compression_loss": 0.0, + "epoch": 1.11, + "label_loss": 0.3525991439819336, + "learning_rate": 2.675581899667486e-05, + "loss": 0.2139, + "step": 6169, + "teacher_loss": 0.19845744967460632 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.22229371964931488, + "learning_rate": 2.676015613705364e-05, + "loss": 0.2624, + "step": 6170, + "teacher_loss": 0.26684755086898804 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3580290973186493, + "learning_rate": 2.6764493277432413e-05, + "loss": 0.2192, + "step": 6171, + "teacher_loss": 0.20379358530044556 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.5995192527770996, + "learning_rate": 2.676883041781119e-05, + "loss": 0.2555, + "step": 6172, + "teacher_loss": 0.2172919362783432 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.34453830122947693, + "learning_rate": 2.6773167558189965e-05, + "loss": 0.2841, + "step": 6173, + "teacher_loss": 0.2774102985858917 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3946101665496826, + "learning_rate": 2.6777504698568743e-05, + "loss": 0.2388, + "step": 6174, + "teacher_loss": 0.22143924236297607 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.49136483669281006, + "learning_rate": 2.678184183894752e-05, + "loss": 0.235, + "step": 6175, + "teacher_loss": 0.2064749002456665 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.31167325377464294, + "learning_rate": 2.6786178979326298e-05, + "loss": 0.2133, + "step": 6176, + "teacher_loss": 0.20236200094223022 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.41216546297073364, + "learning_rate": 2.6790516119705075e-05, + "loss": 0.2172, + "step": 6177, + "teacher_loss": 0.19551342725753784 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3572585880756378, + "learning_rate": 2.6794853260083853e-05, + "loss": 0.4886, + "step": 6178, + "teacher_loss": 0.5031484961509705 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4012985825538635, + "learning_rate": 2.679919040046263e-05, + "loss": 0.2282, + "step": 6179, + "teacher_loss": 0.2089385837316513 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3170543909072876, + "learning_rate": 2.6803527540841408e-05, + "loss": 0.3056, + "step": 6180, + "teacher_loss": 0.30435580015182495 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4239477515220642, + "learning_rate": 2.6807864681220186e-05, + "loss": 0.1943, + "step": 6181, + "teacher_loss": 0.16879016160964966 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3119276165962219, + "learning_rate": 2.6812201821598957e-05, + "loss": 0.2595, + "step": 6182, + "teacher_loss": 0.25371548533439636 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.26219457387924194, + "learning_rate": 2.6816538961977734e-05, + "loss": 0.2188, + "step": 6183, + "teacher_loss": 0.21398219466209412 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.21266594529151917, + "learning_rate": 2.6820876102356512e-05, + "loss": 0.2158, + "step": 6184, + "teacher_loss": 0.21617019176483154 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4246112108230591, + "learning_rate": 2.682521324273529e-05, + "loss": 0.4111, + "step": 6185, + "teacher_loss": 0.40964359045028687 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.38567304611206055, + "learning_rate": 2.6829550383114067e-05, + "loss": 0.3568, + "step": 6186, + "teacher_loss": 0.3535376787185669 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.43796199560165405, + "learning_rate": 2.6833887523492845e-05, + "loss": 0.2584, + "step": 6187, + "teacher_loss": 0.23841264843940735 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.47674989700317383, + "learning_rate": 2.6838224663871622e-05, + "loss": 0.2596, + "step": 6188, + "teacher_loss": 0.23544053733348846 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.26218241453170776, + "learning_rate": 2.68425618042504e-05, + "loss": 0.3698, + "step": 6189, + "teacher_loss": 0.38178274035453796 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3214132487773895, + "learning_rate": 2.6846898944629178e-05, + "loss": 0.2144, + "step": 6190, + "teacher_loss": 0.20247536897659302 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.17942091822624207, + "learning_rate": 2.6851236085007952e-05, + "loss": 0.2534, + "step": 6191, + "teacher_loss": 0.2615981698036194 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3345814347267151, + "learning_rate": 2.685557322538673e-05, + "loss": 0.2987, + "step": 6192, + "teacher_loss": 0.294662743806839 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.7157434821128845, + "learning_rate": 2.6859910365765504e-05, + "loss": 0.2595, + "step": 6193, + "teacher_loss": 0.2088591307401657 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3723817467689514, + "learning_rate": 2.686424750614428e-05, + "loss": 0.2833, + "step": 6194, + "teacher_loss": 0.273406445980072 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3967821002006531, + "learning_rate": 2.686858464652306e-05, + "loss": 0.1766, + "step": 6195, + "teacher_loss": 0.15211889147758484 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.37578219175338745, + "learning_rate": 2.6872921786901837e-05, + "loss": 0.2036, + "step": 6196, + "teacher_loss": 0.18442532420158386 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3875725567340851, + "learning_rate": 2.6877258927280614e-05, + "loss": 0.301, + "step": 6197, + "teacher_loss": 0.29136911034584045 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.31419432163238525, + "learning_rate": 2.6881596067659392e-05, + "loss": 0.3176, + "step": 6198, + "teacher_loss": 0.317990779876709 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.5231434106826782, + "learning_rate": 2.6885933208038166e-05, + "loss": 0.2533, + "step": 6199, + "teacher_loss": 0.22333839535713196 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.07056882977485657, + "learning_rate": 2.6890270348416944e-05, + "loss": 0.1496, + "step": 6200, + "teacher_loss": 0.15836943686008453 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.40262824296951294, + "learning_rate": 2.689460748879572e-05, + "loss": 0.2244, + "step": 6201, + "teacher_loss": 0.20458002388477325 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.22769679129123688, + "learning_rate": 2.68989446291745e-05, + "loss": 0.2256, + "step": 6202, + "teacher_loss": 0.2253933697938919 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3718760013580322, + "learning_rate": 2.6903281769553277e-05, + "loss": 0.2892, + "step": 6203, + "teacher_loss": 0.2800065577030182 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4534824788570404, + "learning_rate": 2.690761890993205e-05, + "loss": 0.2105, + "step": 6204, + "teacher_loss": 0.18346816301345825 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.5914174318313599, + "learning_rate": 2.691195605031083e-05, + "loss": 0.312, + "step": 6205, + "teacher_loss": 0.2809494137763977 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.21935322880744934, + "learning_rate": 2.6916293190689606e-05, + "loss": 0.3454, + "step": 6206, + "teacher_loss": 0.3594011068344116 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.38528817892074585, + "learning_rate": 2.6920630331068384e-05, + "loss": 0.2552, + "step": 6207, + "teacher_loss": 0.24073925614356995 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.6815232038497925, + "learning_rate": 2.6924967471447158e-05, + "loss": 0.494, + "step": 6208, + "teacher_loss": 0.4731563925743103 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.682334303855896, + "learning_rate": 2.6929304611825936e-05, + "loss": 0.8132, + "step": 6209, + "teacher_loss": 0.827795147895813 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.8604118227958679, + "learning_rate": 2.6933641752204713e-05, + "loss": 0.492, + "step": 6210, + "teacher_loss": 0.4510447680950165 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.6672353744506836, + "learning_rate": 2.693797889258349e-05, + "loss": 0.3776, + "step": 6211, + "teacher_loss": 0.34536808729171753 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.691842794418335, + "learning_rate": 2.694231603296227e-05, + "loss": 0.3237, + "step": 6212, + "teacher_loss": 0.2827497124671936 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.3167247772216797, + "learning_rate": 2.6946653173341046e-05, + "loss": 0.265, + "step": 6213, + "teacher_loss": 0.25926733016967773 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.6608999967575073, + "learning_rate": 2.6950990313719824e-05, + "loss": 0.4023, + "step": 6214, + "teacher_loss": 0.37359610199928284 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.29286032915115356, + "learning_rate": 2.6955327454098598e-05, + "loss": 0.2129, + "step": 6215, + "teacher_loss": 0.20399153232574463 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.44547098875045776, + "learning_rate": 2.6959664594477376e-05, + "loss": 0.2856, + "step": 6216, + "teacher_loss": 0.26780247688293457 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4220615029335022, + "learning_rate": 2.696400173485615e-05, + "loss": 0.3394, + "step": 6217, + "teacher_loss": 0.3301962912082672 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4473878741264343, + "learning_rate": 2.6968338875234927e-05, + "loss": 0.2497, + "step": 6218, + "teacher_loss": 0.2277584969997406 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.7074957489967346, + "learning_rate": 2.6972676015613705e-05, + "loss": 0.302, + "step": 6219, + "teacher_loss": 0.2569655179977417 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.5834007263183594, + "learning_rate": 2.6977013155992483e-05, + "loss": 0.2672, + "step": 6220, + "teacher_loss": 0.23203304409980774 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.6585375070571899, + "learning_rate": 2.698135029637126e-05, + "loss": 0.2291, + "step": 6221, + "teacher_loss": 0.18136140704154968 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.4529242515563965, + "learning_rate": 2.6985687436750038e-05, + "loss": 0.257, + "step": 6222, + "teacher_loss": 0.23521792888641357 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.5263648629188538, + "learning_rate": 2.6990024577128816e-05, + "loss": 0.2906, + "step": 6223, + "teacher_loss": 0.26438677310943604 + }, + { + "compression_loss": 0.0, + "epoch": 1.12, + "label_loss": 0.5613182783126831, + "learning_rate": 2.6994361717507593e-05, + "loss": 0.306, + "step": 6224, + "teacher_loss": 0.2776665687561035 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.35175371170043945, + "learning_rate": 2.699869885788637e-05, + "loss": 0.1934, + "step": 6225, + "teacher_loss": 0.1758594810962677 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.17961886525154114, + "learning_rate": 2.700303599826514e-05, + "loss": 0.1958, + "step": 6226, + "teacher_loss": 0.1976485252380371 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.5187227129936218, + "learning_rate": 2.700737313864392e-05, + "loss": 0.2682, + "step": 6227, + "teacher_loss": 0.24039678275585175 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.13387733697891235, + "learning_rate": 2.7011710279022697e-05, + "loss": 0.2003, + "step": 6228, + "teacher_loss": 0.20765471458435059 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.6254796385765076, + "learning_rate": 2.7016047419401474e-05, + "loss": 0.3468, + "step": 6229, + "teacher_loss": 0.3158687651157379 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.41606438159942627, + "learning_rate": 2.7020384559780252e-05, + "loss": 0.2887, + "step": 6230, + "teacher_loss": 0.27454304695129395 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.5500345230102539, + "learning_rate": 2.702472170015903e-05, + "loss": 0.2887, + "step": 6231, + "teacher_loss": 0.2596123218536377 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 1.114802360534668, + "learning_rate": 2.7029058840537807e-05, + "loss": 0.3248, + "step": 6232, + "teacher_loss": 0.23704317212104797 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.2791847884654999, + "learning_rate": 2.7033395980916585e-05, + "loss": 0.1817, + "step": 6233, + "teacher_loss": 0.1709081083536148 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.32286444306373596, + "learning_rate": 2.7037733121295363e-05, + "loss": 0.2264, + "step": 6234, + "teacher_loss": 0.21571099758148193 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.406974196434021, + "learning_rate": 2.7042070261674137e-05, + "loss": 0.254, + "step": 6235, + "teacher_loss": 0.23705804347991943 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.22894413769245148, + "learning_rate": 2.7046407402052914e-05, + "loss": 0.196, + "step": 6236, + "teacher_loss": 0.19238464534282684 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.8907871246337891, + "learning_rate": 2.705074454243169e-05, + "loss": 0.575, + "step": 6237, + "teacher_loss": 0.539874792098999 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.18466398119926453, + "learning_rate": 2.7055081682810466e-05, + "loss": 0.1928, + "step": 6238, + "teacher_loss": 0.1937258094549179 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.42910873889923096, + "learning_rate": 2.7059418823189244e-05, + "loss": 0.2672, + "step": 6239, + "teacher_loss": 0.2492278814315796 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.19823455810546875, + "learning_rate": 2.706375596356802e-05, + "loss": 0.2276, + "step": 6240, + "teacher_loss": 0.23089349269866943 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.2920618951320648, + "learning_rate": 2.70680931039468e-05, + "loss": 0.2358, + "step": 6241, + "teacher_loss": 0.22954139113426208 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.5309002995491028, + "learning_rate": 2.7072430244325577e-05, + "loss": 0.2497, + "step": 6242, + "teacher_loss": 0.2184501737356186 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.7090290188789368, + "learning_rate": 2.707676738470435e-05, + "loss": 0.4171, + "step": 6243, + "teacher_loss": 0.384652704000473 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.8452855348587036, + "learning_rate": 2.708110452508313e-05, + "loss": 0.323, + "step": 6244, + "teacher_loss": 0.2649414837360382 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.3568814992904663, + "learning_rate": 2.7085441665461906e-05, + "loss": 0.3271, + "step": 6245, + "teacher_loss": 0.323817640542984 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.41398942470550537, + "learning_rate": 2.7089778805840684e-05, + "loss": 0.2544, + "step": 6246, + "teacher_loss": 0.23665517568588257 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.4368191957473755, + "learning_rate": 2.709411594621946e-05, + "loss": 0.256, + "step": 6247, + "teacher_loss": 0.23593543469905853 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.452435702085495, + "learning_rate": 2.7098453086598236e-05, + "loss": 0.267, + "step": 6248, + "teacher_loss": 0.2463577389717102 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.35763415694236755, + "learning_rate": 2.7102790226977013e-05, + "loss": 0.3126, + "step": 6249, + "teacher_loss": 0.3075858950614929 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.32362717390060425, + "learning_rate": 2.710712736735579e-05, + "loss": 0.244, + "step": 6250, + "teacher_loss": 0.23513546586036682 + }, + { + "epoch": 1.13, + "eval_exact_match": 79.64049195837275, + "eval_f1": 87.22254960916146, + "step": 6250 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.830796480178833, + "learning_rate": 2.711146450773457e-05, + "loss": 0.2792, + "step": 6251, + "teacher_loss": 0.2179451882839203 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.13678866624832153, + "learning_rate": 2.7115801648113343e-05, + "loss": 0.2053, + "step": 6252, + "teacher_loss": 0.21286973357200623 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.20709894597530365, + "learning_rate": 2.712013878849212e-05, + "loss": 0.231, + "step": 6253, + "teacher_loss": 0.2336689829826355 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.18088707327842712, + "learning_rate": 2.7124475928870898e-05, + "loss": 0.1969, + "step": 6254, + "teacher_loss": 0.19870467483997345 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.43912485241889954, + "learning_rate": 2.7128813069249676e-05, + "loss": 0.3575, + "step": 6255, + "teacher_loss": 0.3484174311161041 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.2224666178226471, + "learning_rate": 2.7133150209628453e-05, + "loss": 0.2235, + "step": 6256, + "teacher_loss": 0.2236527055501938 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.4598965644836426, + "learning_rate": 2.713748735000723e-05, + "loss": 0.3054, + "step": 6257, + "teacher_loss": 0.28820982575416565 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.1729808896780014, + "learning_rate": 2.7141824490386005e-05, + "loss": 0.2324, + "step": 6258, + "teacher_loss": 0.23902226984500885 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.4296117424964905, + "learning_rate": 2.7146161630764783e-05, + "loss": 0.2808, + "step": 6259, + "teacher_loss": 0.2642960548400879 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.5139473080635071, + "learning_rate": 2.715049877114356e-05, + "loss": 0.3018, + "step": 6260, + "teacher_loss": 0.2782681882381439 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.29237133264541626, + "learning_rate": 2.7154835911522335e-05, + "loss": 0.2341, + "step": 6261, + "teacher_loss": 0.22762879729270935 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.46264809370040894, + "learning_rate": 2.7159173051901112e-05, + "loss": 0.2588, + "step": 6262, + "teacher_loss": 0.23618757724761963 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.6601729393005371, + "learning_rate": 2.716351019227989e-05, + "loss": 0.2651, + "step": 6263, + "teacher_loss": 0.22122298181056976 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.44145405292510986, + "learning_rate": 2.7167847332658668e-05, + "loss": 0.2789, + "step": 6264, + "teacher_loss": 0.2608572840690613 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.5389107465744019, + "learning_rate": 2.7172184473037445e-05, + "loss": 0.2824, + "step": 6265, + "teacher_loss": 0.25390520691871643 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.22644034028053284, + "learning_rate": 2.7176521613416223e-05, + "loss": 0.2139, + "step": 6266, + "teacher_loss": 0.2124623954296112 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.46022841334342957, + "learning_rate": 2.7180858753795e-05, + "loss": 0.2103, + "step": 6267, + "teacher_loss": 0.18256577849388123 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.555817723274231, + "learning_rate": 2.7185195894173778e-05, + "loss": 0.2953, + "step": 6268, + "teacher_loss": 0.2663660943508148 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.7040103673934937, + "learning_rate": 2.7189533034552552e-05, + "loss": 0.6986, + "step": 6269, + "teacher_loss": 0.6980404853820801 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.3168404698371887, + "learning_rate": 2.7193870174931327e-05, + "loss": 0.2468, + "step": 6270, + "teacher_loss": 0.23901307582855225 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.42855581641197205, + "learning_rate": 2.7198207315310104e-05, + "loss": 0.2102, + "step": 6271, + "teacher_loss": 0.18594574928283691 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.5309900641441345, + "learning_rate": 2.7202544455688882e-05, + "loss": 0.2689, + "step": 6272, + "teacher_loss": 0.23974266648292542 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.25031739473342896, + "learning_rate": 2.720688159606766e-05, + "loss": 0.2974, + "step": 6273, + "teacher_loss": 0.30264705419540405 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.7180373668670654, + "learning_rate": 2.7211218736446437e-05, + "loss": 0.2472, + "step": 6274, + "teacher_loss": 0.19484871625900269 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.3045549988746643, + "learning_rate": 2.7215555876825215e-05, + "loss": 0.2494, + "step": 6275, + "teacher_loss": 0.24322381615638733 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.42799344658851624, + "learning_rate": 2.7219893017203992e-05, + "loss": 0.2419, + "step": 6276, + "teacher_loss": 0.22117312252521515 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.8396463990211487, + "learning_rate": 2.722423015758277e-05, + "loss": 0.3573, + "step": 6277, + "teacher_loss": 0.3036838173866272 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.9512264728546143, + "learning_rate": 2.7228567297961547e-05, + "loss": 0.2852, + "step": 6278, + "teacher_loss": 0.2112230509519577 + }, + { + "compression_loss": 0.0, + "epoch": 1.13, + "label_loss": 0.46625006198883057, + "learning_rate": 2.7232904438340322e-05, + "loss": 0.2114, + "step": 6279, + "teacher_loss": 0.18309065699577332 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.47699829936027527, + "learning_rate": 2.7237241578719096e-05, + "loss": 0.3059, + "step": 6280, + "teacher_loss": 0.2868354320526123 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.3526540696620941, + "learning_rate": 2.7241578719097874e-05, + "loss": 0.1733, + "step": 6281, + "teacher_loss": 0.15342223644256592 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.340457558631897, + "learning_rate": 2.724591585947665e-05, + "loss": 0.3057, + "step": 6282, + "teacher_loss": 0.3018187880516052 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.18202698230743408, + "learning_rate": 2.725025299985543e-05, + "loss": 0.2616, + "step": 6283, + "teacher_loss": 0.2704620361328125 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.29284486174583435, + "learning_rate": 2.7254590140234206e-05, + "loss": 0.2263, + "step": 6284, + "teacher_loss": 0.21886911988258362 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.5829716920852661, + "learning_rate": 2.7258927280612984e-05, + "loss": 0.283, + "step": 6285, + "teacher_loss": 0.2496703565120697 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.8197914361953735, + "learning_rate": 2.726326442099176e-05, + "loss": 0.3171, + "step": 6286, + "teacher_loss": 0.26123958826065063 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.6244129538536072, + "learning_rate": 2.7267601561370536e-05, + "loss": 0.2476, + "step": 6287, + "teacher_loss": 0.2057790756225586 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.8621256351470947, + "learning_rate": 2.7271938701749314e-05, + "loss": 0.3386, + "step": 6288, + "teacher_loss": 0.2804277241230011 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.3177306056022644, + "learning_rate": 2.727627584212809e-05, + "loss": 0.2355, + "step": 6289, + "teacher_loss": 0.22641147673130035 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4798209071159363, + "learning_rate": 2.728061298250687e-05, + "loss": 0.2302, + "step": 6290, + "teacher_loss": 0.2024591863155365 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4372042417526245, + "learning_rate": 2.7284950122885643e-05, + "loss": 0.2285, + "step": 6291, + "teacher_loss": 0.20527349412441254 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.34870606660842896, + "learning_rate": 2.728928726326442e-05, + "loss": 0.2252, + "step": 6292, + "teacher_loss": 0.21147626638412476 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.27960658073425293, + "learning_rate": 2.7293624403643198e-05, + "loss": 0.1821, + "step": 6293, + "teacher_loss": 0.17124134302139282 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.6935462951660156, + "learning_rate": 2.7297961544021976e-05, + "loss": 0.406, + "step": 6294, + "teacher_loss": 0.3740018606185913 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4411775469779968, + "learning_rate": 2.7302298684400754e-05, + "loss": 0.1909, + "step": 6295, + "teacher_loss": 0.1630869358778 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.23863573372364044, + "learning_rate": 2.7306635824779528e-05, + "loss": 0.265, + "step": 6296, + "teacher_loss": 0.267932653427124 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4179011583328247, + "learning_rate": 2.7310972965158305e-05, + "loss": 0.5044, + "step": 6297, + "teacher_loss": 0.5139556527137756 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.3631821572780609, + "learning_rate": 2.7315310105537083e-05, + "loss": 0.2275, + "step": 6298, + "teacher_loss": 0.21240012347698212 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4780872166156769, + "learning_rate": 2.731964724591586e-05, + "loss": 0.2341, + "step": 6299, + "teacher_loss": 0.2069939374923706 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.528892993927002, + "learning_rate": 2.7323984386294638e-05, + "loss": 0.288, + "step": 6300, + "teacher_loss": 0.2612582743167877 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4992755651473999, + "learning_rate": 2.7328321526673416e-05, + "loss": 0.2545, + "step": 6301, + "teacher_loss": 0.22726961970329285 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.24839726090431213, + "learning_rate": 2.733265866705219e-05, + "loss": 0.1976, + "step": 6302, + "teacher_loss": 0.19200240075588226 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.6737362146377563, + "learning_rate": 2.7336995807430968e-05, + "loss": 0.359, + "step": 6303, + "teacher_loss": 0.3240630626678467 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.1802452802658081, + "learning_rate": 2.7341332947809745e-05, + "loss": 0.2328, + "step": 6304, + "teacher_loss": 0.23865258693695068 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.3792036175727844, + "learning_rate": 2.734567008818852e-05, + "loss": 0.2496, + "step": 6305, + "teacher_loss": 0.2351485937833786 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.7047542333602905, + "learning_rate": 2.7350007228567297e-05, + "loss": 0.2478, + "step": 6306, + "teacher_loss": 0.19703811407089233 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.38650083541870117, + "learning_rate": 2.7354344368946075e-05, + "loss": 0.2128, + "step": 6307, + "teacher_loss": 0.19349591434001923 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.2799943685531616, + "learning_rate": 2.7358681509324852e-05, + "loss": 0.2799, + "step": 6308, + "teacher_loss": 0.2798810601234436 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.5141726732254028, + "learning_rate": 2.736301864970363e-05, + "loss": 0.3894, + "step": 6309, + "teacher_loss": 0.3755737841129303 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.5539348721504211, + "learning_rate": 2.7367355790082408e-05, + "loss": 0.3395, + "step": 6310, + "teacher_loss": 0.3156867027282715 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4410494565963745, + "learning_rate": 2.7371692930461185e-05, + "loss": 0.2387, + "step": 6311, + "teacher_loss": 0.21624797582626343 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 1.3120110034942627, + "learning_rate": 2.7376030070839963e-05, + "loss": 0.4003, + "step": 6312, + "teacher_loss": 0.298954039812088 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.7790467143058777, + "learning_rate": 2.7380367211218737e-05, + "loss": 0.2833, + "step": 6313, + "teacher_loss": 0.22821597754955292 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.42238277196884155, + "learning_rate": 2.738470435159751e-05, + "loss": 0.2331, + "step": 6314, + "teacher_loss": 0.2121065855026245 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4535025954246521, + "learning_rate": 2.738904149197629e-05, + "loss": 0.2444, + "step": 6315, + "teacher_loss": 0.22115977108478546 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.15294498205184937, + "learning_rate": 2.7393378632355067e-05, + "loss": 0.1875, + "step": 6316, + "teacher_loss": 0.19134274125099182 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.37593281269073486, + "learning_rate": 2.7397715772733844e-05, + "loss": 0.2059, + "step": 6317, + "teacher_loss": 0.1870015561580658 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.30167967081069946, + "learning_rate": 2.7402052913112622e-05, + "loss": 0.2312, + "step": 6318, + "teacher_loss": 0.22331474721431732 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.39041733741760254, + "learning_rate": 2.74063900534914e-05, + "loss": 0.2816, + "step": 6319, + "teacher_loss": 0.26952141523361206 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.9085977077484131, + "learning_rate": 2.7410727193870177e-05, + "loss": 0.3891, + "step": 6320, + "teacher_loss": 0.33136987686157227 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.3323662281036377, + "learning_rate": 2.7415064334248955e-05, + "loss": 0.2464, + "step": 6321, + "teacher_loss": 0.2368084341287613 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.34673595428466797, + "learning_rate": 2.7419401474627732e-05, + "loss": 0.2072, + "step": 6322, + "teacher_loss": 0.19164225459098816 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.8913214206695557, + "learning_rate": 2.7423738615006507e-05, + "loss": 0.3504, + "step": 6323, + "teacher_loss": 0.2902667820453644 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.3220446705818176, + "learning_rate": 2.742807575538528e-05, + "loss": 0.2573, + "step": 6324, + "teacher_loss": 0.2501053810119629 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.6296714544296265, + "learning_rate": 2.743241289576406e-05, + "loss": 0.2786, + "step": 6325, + "teacher_loss": 0.23959791660308838 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.7199380397796631, + "learning_rate": 2.7436750036142836e-05, + "loss": 0.3585, + "step": 6326, + "teacher_loss": 0.3183591961860657 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.36631840467453003, + "learning_rate": 2.7441087176521614e-05, + "loss": 0.353, + "step": 6327, + "teacher_loss": 0.3515172004699707 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4167264699935913, + "learning_rate": 2.744542431690039e-05, + "loss": 0.2074, + "step": 6328, + "teacher_loss": 0.18413621187210083 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.7122987508773804, + "learning_rate": 2.744976145727917e-05, + "loss": 0.2447, + "step": 6329, + "teacher_loss": 0.19270175695419312 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.31083378195762634, + "learning_rate": 2.7454098597657947e-05, + "loss": 0.28, + "step": 6330, + "teacher_loss": 0.27654772996902466 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.6642519235610962, + "learning_rate": 2.7458435738036724e-05, + "loss": 0.3909, + "step": 6331, + "teacher_loss": 0.3605444133281708 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.6344103813171387, + "learning_rate": 2.74627728784155e-05, + "loss": 0.2841, + "step": 6332, + "teacher_loss": 0.24523112177848816 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4857063293457031, + "learning_rate": 2.7467110018794276e-05, + "loss": 0.3079, + "step": 6333, + "teacher_loss": 0.2880992293357849 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.7089545726776123, + "learning_rate": 2.7471447159173054e-05, + "loss": 0.2574, + "step": 6334, + "teacher_loss": 0.20726659893989563 + }, + { + "compression_loss": 0.0, + "epoch": 1.14, + "label_loss": 0.4617321193218231, + "learning_rate": 2.7475784299551828e-05, + "loss": 0.2639, + "step": 6335, + "teacher_loss": 0.2419673055410385 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.4766450822353363, + "learning_rate": 2.7480121439930606e-05, + "loss": 0.2375, + "step": 6336, + "teacher_loss": 0.21091046929359436 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.5361030697822571, + "learning_rate": 2.7484458580309383e-05, + "loss": 0.3004, + "step": 6337, + "teacher_loss": 0.2742132544517517 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.5607529282569885, + "learning_rate": 2.748879572068816e-05, + "loss": 0.2962, + "step": 6338, + "teacher_loss": 0.2667674720287323 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.4431915581226349, + "learning_rate": 2.749313286106694e-05, + "loss": 0.2619, + "step": 6339, + "teacher_loss": 0.24172131717205048 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.08878196775913239, + "learning_rate": 2.7497470001445713e-05, + "loss": 0.1881, + "step": 6340, + "teacher_loss": 0.19911187887191772 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.3350834548473358, + "learning_rate": 2.750180714182449e-05, + "loss": 0.2095, + "step": 6341, + "teacher_loss": 0.1955356001853943 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.5581668615341187, + "learning_rate": 2.7506144282203268e-05, + "loss": 0.2504, + "step": 6342, + "teacher_loss": 0.21615996956825256 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.22815459966659546, + "learning_rate": 2.7510481422582045e-05, + "loss": 0.1572, + "step": 6343, + "teacher_loss": 0.14934581518173218 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.27737295627593994, + "learning_rate": 2.7514818562960823e-05, + "loss": 0.1542, + "step": 6344, + "teacher_loss": 0.1405356526374817 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.3505832254886627, + "learning_rate": 2.75191557033396e-05, + "loss": 0.1634, + "step": 6345, + "teacher_loss": 0.142632395029068 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.45454078912734985, + "learning_rate": 2.7523492843718375e-05, + "loss": 0.2362, + "step": 6346, + "teacher_loss": 0.21190527081489563 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.5701397657394409, + "learning_rate": 2.7527829984097153e-05, + "loss": 0.2919, + "step": 6347, + "teacher_loss": 0.2609695792198181 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.6707603335380554, + "learning_rate": 2.753216712447593e-05, + "loss": 0.2165, + "step": 6348, + "teacher_loss": 0.16599249839782715 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.4907478094100952, + "learning_rate": 2.7536504264854704e-05, + "loss": 0.2503, + "step": 6349, + "teacher_loss": 0.22357916831970215 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.4619969129562378, + "learning_rate": 2.7540841405233482e-05, + "loss": 0.2638, + "step": 6350, + "teacher_loss": 0.24181979894638062 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.37157875299453735, + "learning_rate": 2.754517854561226e-05, + "loss": 0.1963, + "step": 6351, + "teacher_loss": 0.17678387463092804 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.6931254267692566, + "learning_rate": 2.7549515685991037e-05, + "loss": 0.3052, + "step": 6352, + "teacher_loss": 0.26204460859298706 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.2829945385456085, + "learning_rate": 2.7553852826369815e-05, + "loss": 0.1856, + "step": 6353, + "teacher_loss": 0.17473077774047852 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.2657064199447632, + "learning_rate": 2.7558189966748593e-05, + "loss": 0.2268, + "step": 6354, + "teacher_loss": 0.2224724441766739 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.24362745881080627, + "learning_rate": 2.756252710712737e-05, + "loss": 0.2257, + "step": 6355, + "teacher_loss": 0.223673477768898 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.74949049949646, + "learning_rate": 2.7566864247506144e-05, + "loss": 0.2682, + "step": 6356, + "teacher_loss": 0.21476420760154724 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.924878716468811, + "learning_rate": 2.7571201387884922e-05, + "loss": 0.2894, + "step": 6357, + "teacher_loss": 0.21879440546035767 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.46477210521698, + "learning_rate": 2.7575538528263696e-05, + "loss": 0.2589, + "step": 6358, + "teacher_loss": 0.2360476553440094 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.37256282567977905, + "learning_rate": 2.7579875668642474e-05, + "loss": 0.2644, + "step": 6359, + "teacher_loss": 0.2523536682128906 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.30953651666641235, + "learning_rate": 2.758421280902125e-05, + "loss": 0.2095, + "step": 6360, + "teacher_loss": 0.19843554496765137 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.41758018732070923, + "learning_rate": 2.758854994940003e-05, + "loss": 0.244, + "step": 6361, + "teacher_loss": 0.22469252347946167 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.48725467920303345, + "learning_rate": 2.7592887089778807e-05, + "loss": 0.2627, + "step": 6362, + "teacher_loss": 0.2377915382385254 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.5599921345710754, + "learning_rate": 2.7597224230157584e-05, + "loss": 0.2788, + "step": 6363, + "teacher_loss": 0.24754825234413147 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.30126145482063293, + "learning_rate": 2.7601561370536362e-05, + "loss": 0.166, + "step": 6364, + "teacher_loss": 0.15102456510066986 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.41831398010253906, + "learning_rate": 2.760589851091514e-05, + "loss": 0.2129, + "step": 6365, + "teacher_loss": 0.19006478786468506 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.34805721044540405, + "learning_rate": 2.7610235651293917e-05, + "loss": 0.2628, + "step": 6366, + "teacher_loss": 0.2533630430698395 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.6780663728713989, + "learning_rate": 2.7614572791672688e-05, + "loss": 0.2985, + "step": 6367, + "teacher_loss": 0.2562969923019409 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.31010255217552185, + "learning_rate": 2.7618909932051466e-05, + "loss": 0.2545, + "step": 6368, + "teacher_loss": 0.24829965829849243 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.11529463529586792, + "learning_rate": 2.7623247072430243e-05, + "loss": 0.274, + "step": 6369, + "teacher_loss": 0.29159271717071533 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.33310267329216003, + "learning_rate": 2.762758421280902e-05, + "loss": 0.1801, + "step": 6370, + "teacher_loss": 0.16311690211296082 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.21791435778141022, + "learning_rate": 2.76319213531878e-05, + "loss": 0.2104, + "step": 6371, + "teacher_loss": 0.20957419276237488 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.7558392286300659, + "learning_rate": 2.7636258493566576e-05, + "loss": 0.5143, + "step": 6372, + "teacher_loss": 0.48749488592147827 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.4173157513141632, + "learning_rate": 2.7640595633945354e-05, + "loss": 0.3341, + "step": 6373, + "teacher_loss": 0.32487431168556213 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.40799587965011597, + "learning_rate": 2.764493277432413e-05, + "loss": 0.3826, + "step": 6374, + "teacher_loss": 0.3797566592693329 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.44847017526626587, + "learning_rate": 2.764926991470291e-05, + "loss": 0.2812, + "step": 6375, + "teacher_loss": 0.26265496015548706 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.10728070884943008, + "learning_rate": 2.7653607055081683e-05, + "loss": 0.1657, + "step": 6376, + "teacher_loss": 0.17218562960624695 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.31788110733032227, + "learning_rate": 2.765794419546046e-05, + "loss": 0.1942, + "step": 6377, + "teacher_loss": 0.18046870827674866 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.7578850984573364, + "learning_rate": 2.7662281335839235e-05, + "loss": 0.2984, + "step": 6378, + "teacher_loss": 0.24736475944519043 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.5061662793159485, + "learning_rate": 2.7666618476218013e-05, + "loss": 0.3042, + "step": 6379, + "teacher_loss": 0.2817625105381012 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.38620129227638245, + "learning_rate": 2.767095561659679e-05, + "loss": 0.2618, + "step": 6380, + "teacher_loss": 0.24792803823947906 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.31748366355895996, + "learning_rate": 2.7675292756975568e-05, + "loss": 0.24, + "step": 6381, + "teacher_loss": 0.23133675754070282 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.40637660026550293, + "learning_rate": 2.7679629897354346e-05, + "loss": 0.3156, + "step": 6382, + "teacher_loss": 0.3055616319179535 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.2733974754810333, + "learning_rate": 2.7683967037733123e-05, + "loss": 0.2232, + "step": 6383, + "teacher_loss": 0.21762242913246155 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.3107895255088806, + "learning_rate": 2.7688304178111897e-05, + "loss": 0.207, + "step": 6384, + "teacher_loss": 0.19541393220424652 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.6396842002868652, + "learning_rate": 2.7692641318490675e-05, + "loss": 0.5034, + "step": 6385, + "teacher_loss": 0.48821765184402466 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.3159215450286865, + "learning_rate": 2.7696978458869453e-05, + "loss": 0.2052, + "step": 6386, + "teacher_loss": 0.19284874200820923 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.25341832637786865, + "learning_rate": 2.770131559924823e-05, + "loss": 0.2182, + "step": 6387, + "teacher_loss": 0.21430335938930511 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.869004487991333, + "learning_rate": 2.7705652739627008e-05, + "loss": 0.4049, + "step": 6388, + "teacher_loss": 0.3533214330673218 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.23382866382598877, + "learning_rate": 2.7709989880005782e-05, + "loss": 0.1245, + "step": 6389, + "teacher_loss": 0.11231796443462372 + }, + { + "compression_loss": 0.0, + "epoch": 1.15, + "label_loss": 0.3859778046607971, + "learning_rate": 2.771432702038456e-05, + "loss": 0.2162, + "step": 6390, + "teacher_loss": 0.19735205173492432 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.652147114276886, + "learning_rate": 2.7718664160763337e-05, + "loss": 0.3079, + "step": 6391, + "teacher_loss": 0.26961907744407654 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 1.125064730644226, + "learning_rate": 2.7723001301142115e-05, + "loss": 0.2868, + "step": 6392, + "teacher_loss": 0.19364196062088013 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.8060420751571655, + "learning_rate": 2.772733844152089e-05, + "loss": 0.3543, + "step": 6393, + "teacher_loss": 0.3040893077850342 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.47238391637802124, + "learning_rate": 2.7731675581899667e-05, + "loss": 0.2335, + "step": 6394, + "teacher_loss": 0.20696324110031128 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.3843284249305725, + "learning_rate": 2.7736012722278445e-05, + "loss": 0.3142, + "step": 6395, + "teacher_loss": 0.30645751953125 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.42373013496398926, + "learning_rate": 2.7740349862657222e-05, + "loss": 0.3323, + "step": 6396, + "teacher_loss": 0.32218581438064575 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.7030600309371948, + "learning_rate": 2.7744687003036e-05, + "loss": 0.3194, + "step": 6397, + "teacher_loss": 0.27676939964294434 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5010223388671875, + "learning_rate": 2.7749024143414777e-05, + "loss": 0.2565, + "step": 6398, + "teacher_loss": 0.22930167615413666 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.7198001146316528, + "learning_rate": 2.7753361283793555e-05, + "loss": 0.2998, + "step": 6399, + "teacher_loss": 0.2531326115131378 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.41181817650794983, + "learning_rate": 2.775769842417233e-05, + "loss": 0.2249, + "step": 6400, + "teacher_loss": 0.20407897233963013 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.7670482397079468, + "learning_rate": 2.7762035564551107e-05, + "loss": 0.276, + "step": 6401, + "teacher_loss": 0.2214670479297638 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5739279389381409, + "learning_rate": 2.776637270492988e-05, + "loss": 0.2865, + "step": 6402, + "teacher_loss": 0.2546163499355316 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.9249714612960815, + "learning_rate": 2.777070984530866e-05, + "loss": 0.804, + "step": 6403, + "teacher_loss": 0.7905073165893555 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.589565634727478, + "learning_rate": 2.7775046985687436e-05, + "loss": 0.3068, + "step": 6404, + "teacher_loss": 0.27539947628974915 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5522127151489258, + "learning_rate": 2.7779384126066214e-05, + "loss": 0.3152, + "step": 6405, + "teacher_loss": 0.2889009118080139 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.42046046257019043, + "learning_rate": 2.778372126644499e-05, + "loss": 0.2388, + "step": 6406, + "teacher_loss": 0.21865659952163696 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.35354235768318176, + "learning_rate": 2.778805840682377e-05, + "loss": 0.2875, + "step": 6407, + "teacher_loss": 0.28016796708106995 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4919174313545227, + "learning_rate": 2.7792395547202547e-05, + "loss": 0.2095, + "step": 6408, + "teacher_loss": 0.17809349298477173 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.25068795680999756, + "learning_rate": 2.7796732687581324e-05, + "loss": 0.2386, + "step": 6409, + "teacher_loss": 0.23725396394729614 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 1.0543603897094727, + "learning_rate": 2.7801069827960102e-05, + "loss": 0.4795, + "step": 6410, + "teacher_loss": 0.41560447216033936 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4111000895500183, + "learning_rate": 2.7805406968338873e-05, + "loss": 0.2818, + "step": 6411, + "teacher_loss": 0.2674700915813446 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.35354477167129517, + "learning_rate": 2.780974410871765e-05, + "loss": 0.2229, + "step": 6412, + "teacher_loss": 0.2083551287651062 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.34267279505729675, + "learning_rate": 2.7814081249096428e-05, + "loss": 0.2741, + "step": 6413, + "teacher_loss": 0.26642781496047974 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.3548782467842102, + "learning_rate": 2.7818418389475206e-05, + "loss": 0.2437, + "step": 6414, + "teacher_loss": 0.23138342797756195 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.2918523848056793, + "learning_rate": 2.7822755529853983e-05, + "loss": 0.2586, + "step": 6415, + "teacher_loss": 0.25490593910217285 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4233061969280243, + "learning_rate": 2.782709267023276e-05, + "loss": 0.3156, + "step": 6416, + "teacher_loss": 0.3036794066429138 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4841456413269043, + "learning_rate": 2.783142981061154e-05, + "loss": 0.2942, + "step": 6417, + "teacher_loss": 0.273131400346756 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4677685499191284, + "learning_rate": 2.7835766950990316e-05, + "loss": 0.2407, + "step": 6418, + "teacher_loss": 0.2155158817768097 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.8999346494674683, + "learning_rate": 2.7840104091369094e-05, + "loss": 0.2721, + "step": 6419, + "teacher_loss": 0.2022942304611206 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.44143545627593994, + "learning_rate": 2.7844441231747868e-05, + "loss": 0.2192, + "step": 6420, + "teacher_loss": 0.19446223974227905 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.2656756043434143, + "learning_rate": 2.7848778372126646e-05, + "loss": 0.1776, + "step": 6421, + "teacher_loss": 0.16781297326087952 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.3032439053058624, + "learning_rate": 2.785311551250542e-05, + "loss": 0.2425, + "step": 6422, + "teacher_loss": 0.23575055599212646 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.6015926003456116, + "learning_rate": 2.7857452652884198e-05, + "loss": 0.3123, + "step": 6423, + "teacher_loss": 0.2801334261894226 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5455918908119202, + "learning_rate": 2.7861789793262975e-05, + "loss": 0.4244, + "step": 6424, + "teacher_loss": 0.4109860062599182 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.12163172662258148, + "learning_rate": 2.7866126933641753e-05, + "loss": 0.2045, + "step": 6425, + "teacher_loss": 0.21372094750404358 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.36771732568740845, + "learning_rate": 2.787046407402053e-05, + "loss": 0.2344, + "step": 6426, + "teacher_loss": 0.2195802927017212 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.45391595363616943, + "learning_rate": 2.7874801214399308e-05, + "loss": 0.2415, + "step": 6427, + "teacher_loss": 0.2179490029811859 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4787466824054718, + "learning_rate": 2.7879138354778082e-05, + "loss": 0.203, + "step": 6428, + "teacher_loss": 0.17241042852401733 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.45165571570396423, + "learning_rate": 2.788347549515686e-05, + "loss": 0.2606, + "step": 6429, + "teacher_loss": 0.2393563687801361 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4721669852733612, + "learning_rate": 2.7887812635535638e-05, + "loss": 0.2889, + "step": 6430, + "teacher_loss": 0.2685520648956299 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.6080912351608276, + "learning_rate": 2.7892149775914415e-05, + "loss": 0.2955, + "step": 6431, + "teacher_loss": 0.26077860593795776 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.38433215022087097, + "learning_rate": 2.7896486916293193e-05, + "loss": 0.2592, + "step": 6432, + "teacher_loss": 0.24534134566783905 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.30968979001045227, + "learning_rate": 2.7900824056671967e-05, + "loss": 0.1914, + "step": 6433, + "teacher_loss": 0.17823836207389832 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.40799680352211, + "learning_rate": 2.7905161197050745e-05, + "loss": 0.2744, + "step": 6434, + "teacher_loss": 0.25960564613342285 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.825791597366333, + "learning_rate": 2.7909498337429522e-05, + "loss": 0.2314, + "step": 6435, + "teacher_loss": 0.16538411378860474 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.7094988822937012, + "learning_rate": 2.79138354778083e-05, + "loss": 0.4116, + "step": 6436, + "teacher_loss": 0.3785497546195984 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4178553521633148, + "learning_rate": 2.7918172618187074e-05, + "loss": 0.3332, + "step": 6437, + "teacher_loss": 0.3238462805747986 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.528026819229126, + "learning_rate": 2.7922509758565852e-05, + "loss": 0.3393, + "step": 6438, + "teacher_loss": 0.31837016344070435 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5983232855796814, + "learning_rate": 2.792684689894463e-05, + "loss": 0.426, + "step": 6439, + "teacher_loss": 0.40688955783843994 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.303390771150589, + "learning_rate": 2.7931184039323407e-05, + "loss": 0.3675, + "step": 6440, + "teacher_loss": 0.3746604919433594 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.4176335632801056, + "learning_rate": 2.7935521179702185e-05, + "loss": 0.3742, + "step": 6441, + "teacher_loss": 0.3694085478782654 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5791257619857788, + "learning_rate": 2.7939858320080962e-05, + "loss": 0.1743, + "step": 6442, + "teacher_loss": 0.1293712556362152 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.34995460510253906, + "learning_rate": 2.794419546045974e-05, + "loss": 0.2539, + "step": 6443, + "teacher_loss": 0.24321651458740234 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.5420399904251099, + "learning_rate": 2.7948532600838514e-05, + "loss": 0.2735, + "step": 6444, + "teacher_loss": 0.2436596304178238 + }, + { + "compression_loss": 0.0, + "epoch": 1.16, + "label_loss": 0.2640339732170105, + "learning_rate": 2.7952869741217292e-05, + "loss": 0.2675, + "step": 6445, + "teacher_loss": 0.26785755157470703 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.34202006459236145, + "learning_rate": 2.7957206881596066e-05, + "loss": 0.2812, + "step": 6446, + "teacher_loss": 0.2744370102882385 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.37827402353286743, + "learning_rate": 2.7961544021974844e-05, + "loss": 0.2138, + "step": 6447, + "teacher_loss": 0.19549629092216492 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4497312307357788, + "learning_rate": 2.796588116235362e-05, + "loss": 0.1989, + "step": 6448, + "teacher_loss": 0.17098847031593323 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4654259979724884, + "learning_rate": 2.79702183027324e-05, + "loss": 0.223, + "step": 6449, + "teacher_loss": 0.19611376523971558 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.41498297452926636, + "learning_rate": 2.7974555443111177e-05, + "loss": 0.3033, + "step": 6450, + "teacher_loss": 0.2908935248851776 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.3507779538631439, + "learning_rate": 2.7978892583489954e-05, + "loss": 0.2721, + "step": 6451, + "teacher_loss": 0.26335179805755615 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.7619916796684265, + "learning_rate": 2.7983229723868732e-05, + "loss": 0.2607, + "step": 6452, + "teacher_loss": 0.20505085587501526 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.49838903546333313, + "learning_rate": 2.798756686424751e-05, + "loss": 0.2618, + "step": 6453, + "teacher_loss": 0.2355421483516693 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.3548928499221802, + "learning_rate": 2.7991904004626284e-05, + "loss": 0.2126, + "step": 6454, + "teacher_loss": 0.19674894213676453 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.3909909725189209, + "learning_rate": 2.7996241145005058e-05, + "loss": 0.1766, + "step": 6455, + "teacher_loss": 0.15283125638961792 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.6628071665763855, + "learning_rate": 2.8000578285383835e-05, + "loss": 0.3085, + "step": 6456, + "teacher_loss": 0.2690792381763458 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.43428581953048706, + "learning_rate": 2.8004915425762613e-05, + "loss": 0.2508, + "step": 6457, + "teacher_loss": 0.23045343160629272 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.3233993649482727, + "learning_rate": 2.800925256614139e-05, + "loss": 0.2795, + "step": 6458, + "teacher_loss": 0.2745901644229889 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.2688933312892914, + "learning_rate": 2.801358970652017e-05, + "loss": 0.2246, + "step": 6459, + "teacher_loss": 0.21971148252487183 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.5207000374794006, + "learning_rate": 2.8017926846898946e-05, + "loss": 0.2268, + "step": 6460, + "teacher_loss": 0.19416257739067078 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.16202065348625183, + "learning_rate": 2.8022263987277724e-05, + "loss": 0.2715, + "step": 6461, + "teacher_loss": 0.28370770812034607 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 1.190521001815796, + "learning_rate": 2.80266011276565e-05, + "loss": 0.3236, + "step": 6462, + "teacher_loss": 0.22726945579051971 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.35429704189300537, + "learning_rate": 2.803093826803528e-05, + "loss": 0.2654, + "step": 6463, + "teacher_loss": 0.25551217794418335 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4185071587562561, + "learning_rate": 2.8035275408414053e-05, + "loss": 0.2358, + "step": 6464, + "teacher_loss": 0.21551606059074402 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.37365108728408813, + "learning_rate": 2.8039612548792827e-05, + "loss": 0.2347, + "step": 6465, + "teacher_loss": 0.21926668286323547 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4165058434009552, + "learning_rate": 2.8043949689171605e-05, + "loss": 0.2196, + "step": 6466, + "teacher_loss": 0.1976912021636963 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4885011613368988, + "learning_rate": 2.8048286829550383e-05, + "loss": 0.22, + "step": 6467, + "teacher_loss": 0.19020432233810425 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.7185476422309875, + "learning_rate": 2.805262396992916e-05, + "loss": 0.2972, + "step": 6468, + "teacher_loss": 0.25042128562927246 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.861733078956604, + "learning_rate": 2.8056961110307938e-05, + "loss": 0.3242, + "step": 6469, + "teacher_loss": 0.26442667841911316 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.6002152562141418, + "learning_rate": 2.8061298250686715e-05, + "loss": 0.2609, + "step": 6470, + "teacher_loss": 0.22321179509162903 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.5005057454109192, + "learning_rate": 2.8065635391065493e-05, + "loss": 0.3142, + "step": 6471, + "teacher_loss": 0.2934834957122803 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.3781481683254242, + "learning_rate": 2.806997253144427e-05, + "loss": 0.3115, + "step": 6472, + "teacher_loss": 0.30409160256385803 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.6160641312599182, + "learning_rate": 2.8074309671823045e-05, + "loss": 0.2426, + "step": 6473, + "teacher_loss": 0.20109286904335022 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.72745680809021, + "learning_rate": 2.8078646812201822e-05, + "loss": 0.2808, + "step": 6474, + "teacher_loss": 0.23116064071655273 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.6917792558670044, + "learning_rate": 2.80829839525806e-05, + "loss": 0.3515, + "step": 6475, + "teacher_loss": 0.31372568011283875 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.565274715423584, + "learning_rate": 2.8087321092959374e-05, + "loss": 0.313, + "step": 6476, + "teacher_loss": 0.2849982976913452 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.19094763696193695, + "learning_rate": 2.8091658233338152e-05, + "loss": 0.1602, + "step": 6477, + "teacher_loss": 0.15672853589057922 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4896821975708008, + "learning_rate": 2.809599537371693e-05, + "loss": 0.3127, + "step": 6478, + "teacher_loss": 0.29306167364120483 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.46105965971946716, + "learning_rate": 2.8100332514095707e-05, + "loss": 0.2, + "step": 6479, + "teacher_loss": 0.17096011340618134 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.33770179748535156, + "learning_rate": 2.8104669654474485e-05, + "loss": 0.2038, + "step": 6480, + "teacher_loss": 0.18888059258460999 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.3461548686027527, + "learning_rate": 2.810900679485326e-05, + "loss": 0.2147, + "step": 6481, + "teacher_loss": 0.20009593665599823 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.1822448968887329, + "learning_rate": 2.8113343935232037e-05, + "loss": 0.3037, + "step": 6482, + "teacher_loss": 0.3172001838684082 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4877607822418213, + "learning_rate": 2.8117681075610814e-05, + "loss": 0.2831, + "step": 6483, + "teacher_loss": 0.26035308837890625 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.5004500150680542, + "learning_rate": 2.8122018215989592e-05, + "loss": 0.2864, + "step": 6484, + "teacher_loss": 0.2626700699329376 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.5033208131790161, + "learning_rate": 2.812635535636837e-05, + "loss": 0.2337, + "step": 6485, + "teacher_loss": 0.20375798642635345 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.6915962100028992, + "learning_rate": 2.8130692496747147e-05, + "loss": 0.4521, + "step": 6486, + "teacher_loss": 0.4255412518978119 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.655550479888916, + "learning_rate": 2.813502963712592e-05, + "loss": 0.4921, + "step": 6487, + "teacher_loss": 0.4739099144935608 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.23684318363666534, + "learning_rate": 2.81393667775047e-05, + "loss": 0.196, + "step": 6488, + "teacher_loss": 0.1914912760257721 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.7285459637641907, + "learning_rate": 2.8143703917883477e-05, + "loss": 0.4511, + "step": 6489, + "teacher_loss": 0.42024916410446167 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.2557194232940674, + "learning_rate": 2.814804105826225e-05, + "loss": 0.2404, + "step": 6490, + "teacher_loss": 0.23871949315071106 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.48193615674972534, + "learning_rate": 2.815237819864103e-05, + "loss": 0.2937, + "step": 6491, + "teacher_loss": 0.2728140950202942 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.36556559801101685, + "learning_rate": 2.8156715339019806e-05, + "loss": 0.2347, + "step": 6492, + "teacher_loss": 0.22018060088157654 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.5264362692832947, + "learning_rate": 2.8161052479398584e-05, + "loss": 0.2476, + "step": 6493, + "teacher_loss": 0.21665555238723755 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.5201779007911682, + "learning_rate": 2.816538961977736e-05, + "loss": 0.2926, + "step": 6494, + "teacher_loss": 0.26726996898651123 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.6681616306304932, + "learning_rate": 2.816972676015614e-05, + "loss": 0.2924, + "step": 6495, + "teacher_loss": 0.2507016658782959 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.49516937136650085, + "learning_rate": 2.8174063900534917e-05, + "loss": 0.3344, + "step": 6496, + "teacher_loss": 0.31656545400619507 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.17480704188346863, + "learning_rate": 2.8178401040913694e-05, + "loss": 0.2306, + "step": 6497, + "teacher_loss": 0.23676884174346924 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.428730845451355, + "learning_rate": 2.818273818129247e-05, + "loss": 0.2939, + "step": 6498, + "teacher_loss": 0.2789068818092346 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.17639249563217163, + "learning_rate": 2.8187075321671243e-05, + "loss": 0.1406, + "step": 6499, + "teacher_loss": 0.13656772673130035 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.42274466156959534, + "learning_rate": 2.819141246205002e-05, + "loss": 0.2263, + "step": 6500, + "teacher_loss": 0.204467311501503 + }, + { + "epoch": 1.17, + "eval_exact_match": 79.63103122043519, + "eval_f1": 87.0633606447321, + "step": 6500 + }, + { + "compression_loss": 0.0, + "epoch": 1.17, + "label_loss": 0.4441624879837036, + "learning_rate": 2.8195749602428798e-05, + "loss": 0.3149, + "step": 6501, + "teacher_loss": 0.30049407482147217 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3862485885620117, + "learning_rate": 2.8200086742807576e-05, + "loss": 0.222, + "step": 6502, + "teacher_loss": 0.2037688046693802 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.27982306480407715, + "learning_rate": 2.8204423883186353e-05, + "loss": 0.2054, + "step": 6503, + "teacher_loss": 0.19711607694625854 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.4871073365211487, + "learning_rate": 2.820876102356513e-05, + "loss": 0.2411, + "step": 6504, + "teacher_loss": 0.21380865573883057 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.36022287607192993, + "learning_rate": 2.821309816394391e-05, + "loss": 0.3466, + "step": 6505, + "teacher_loss": 0.3450416326522827 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.4648471474647522, + "learning_rate": 2.8217435304322686e-05, + "loss": 0.304, + "step": 6506, + "teacher_loss": 0.28616178035736084 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.22943875193595886, + "learning_rate": 2.8221772444701464e-05, + "loss": 0.2092, + "step": 6507, + "teacher_loss": 0.20695193111896515 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.617201030254364, + "learning_rate": 2.8226109585080238e-05, + "loss": 0.2288, + "step": 6508, + "teacher_loss": 0.1856650412082672 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.8473166227340698, + "learning_rate": 2.8230446725459012e-05, + "loss": 0.3197, + "step": 6509, + "teacher_loss": 0.2610397934913635 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.38283032178878784, + "learning_rate": 2.823478386583779e-05, + "loss": 0.163, + "step": 6510, + "teacher_loss": 0.13860836625099182 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5371262431144714, + "learning_rate": 2.8239121006216567e-05, + "loss": 0.2377, + "step": 6511, + "teacher_loss": 0.20439651608467102 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.4067457914352417, + "learning_rate": 2.8243458146595345e-05, + "loss": 0.2463, + "step": 6512, + "teacher_loss": 0.22847366333007812 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3021693825721741, + "learning_rate": 2.8247795286974123e-05, + "loss": 0.2207, + "step": 6513, + "teacher_loss": 0.2116512954235077 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.11785378307104111, + "learning_rate": 2.82521324273529e-05, + "loss": 0.1479, + "step": 6514, + "teacher_loss": 0.1512056589126587 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.18052011728286743, + "learning_rate": 2.8256469567731678e-05, + "loss": 0.1785, + "step": 6515, + "teacher_loss": 0.178291916847229 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5591962337493896, + "learning_rate": 2.8260806708110456e-05, + "loss": 0.2511, + "step": 6516, + "teacher_loss": 0.21683087944984436 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.7162905931472778, + "learning_rate": 2.826514384848923e-05, + "loss": 0.2837, + "step": 6517, + "teacher_loss": 0.23564772307872772 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3718409538269043, + "learning_rate": 2.8269480988868007e-05, + "loss": 0.2447, + "step": 6518, + "teacher_loss": 0.23061034083366394 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.7232087850570679, + "learning_rate": 2.8273818129246785e-05, + "loss": 0.2946, + "step": 6519, + "teacher_loss": 0.24693775177001953 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5598447918891907, + "learning_rate": 2.827815526962556e-05, + "loss": 0.2183, + "step": 6520, + "teacher_loss": 0.18029527366161346 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5116032958030701, + "learning_rate": 2.8282492410004337e-05, + "loss": 0.3776, + "step": 6521, + "teacher_loss": 0.36270803213119507 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.6097493171691895, + "learning_rate": 2.8286829550383114e-05, + "loss": 0.2895, + "step": 6522, + "teacher_loss": 0.25388604402542114 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.48980289697647095, + "learning_rate": 2.8291166690761892e-05, + "loss": 0.3296, + "step": 6523, + "teacher_loss": 0.3118417263031006 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3335012197494507, + "learning_rate": 2.829550383114067e-05, + "loss": 0.2726, + "step": 6524, + "teacher_loss": 0.26579126715660095 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.25868645310401917, + "learning_rate": 2.8299840971519444e-05, + "loss": 0.2219, + "step": 6525, + "teacher_loss": 0.21784977614879608 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 1.0642547607421875, + "learning_rate": 2.830417811189822e-05, + "loss": 0.3549, + "step": 6526, + "teacher_loss": 0.27602994441986084 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.30355343222618103, + "learning_rate": 2.8308515252277e-05, + "loss": 0.1674, + "step": 6527, + "teacher_loss": 0.15225505828857422 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.6845999360084534, + "learning_rate": 2.8312852392655777e-05, + "loss": 0.2762, + "step": 6528, + "teacher_loss": 0.2308567762374878 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.27771762013435364, + "learning_rate": 2.8317189533034554e-05, + "loss": 0.2462, + "step": 6529, + "teacher_loss": 0.24269534647464752 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.21253381669521332, + "learning_rate": 2.8321526673413332e-05, + "loss": 0.2491, + "step": 6530, + "teacher_loss": 0.2531481683254242 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.16174480319023132, + "learning_rate": 2.8325863813792106e-05, + "loss": 0.1774, + "step": 6531, + "teacher_loss": 0.17912143468856812 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.32329800724983215, + "learning_rate": 2.8330200954170884e-05, + "loss": 0.4324, + "step": 6532, + "teacher_loss": 0.4445135295391083 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3365422785282135, + "learning_rate": 2.833453809454966e-05, + "loss": 0.3073, + "step": 6533, + "teacher_loss": 0.3040579855442047 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.46901994943618774, + "learning_rate": 2.8338875234928436e-05, + "loss": 0.3771, + "step": 6534, + "teacher_loss": 0.3668709993362427 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.2946434020996094, + "learning_rate": 2.8343212375307213e-05, + "loss": 0.3345, + "step": 6535, + "teacher_loss": 0.3389154076576233 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3871268033981323, + "learning_rate": 2.834754951568599e-05, + "loss": 0.1954, + "step": 6536, + "teacher_loss": 0.17414361238479614 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.4594939351081848, + "learning_rate": 2.835188665606477e-05, + "loss": 0.2725, + "step": 6537, + "teacher_loss": 0.2517177164554596 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.2386641502380371, + "learning_rate": 2.8356223796443546e-05, + "loss": 0.2476, + "step": 6538, + "teacher_loss": 0.24860814213752747 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.330252468585968, + "learning_rate": 2.8360560936822324e-05, + "loss": 0.2066, + "step": 6539, + "teacher_loss": 0.192832350730896 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.47183674573898315, + "learning_rate": 2.83648980772011e-05, + "loss": 0.2883, + "step": 6540, + "teacher_loss": 0.26787739992141724 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.37910300493240356, + "learning_rate": 2.836923521757988e-05, + "loss": 0.2663, + "step": 6541, + "teacher_loss": 0.2537977695465088 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.9918789863586426, + "learning_rate": 2.8373572357958653e-05, + "loss": 0.597, + "step": 6542, + "teacher_loss": 0.5531030893325806 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.6531774997711182, + "learning_rate": 2.8377909498337428e-05, + "loss": 0.2253, + "step": 6543, + "teacher_loss": 0.17774729430675507 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.4606553912162781, + "learning_rate": 2.8382246638716205e-05, + "loss": 0.2498, + "step": 6544, + "teacher_loss": 0.22633501887321472 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 1.1842460632324219, + "learning_rate": 2.8386583779094983e-05, + "loss": 0.5024, + "step": 6545, + "teacher_loss": 0.42660731077194214 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 1.2034978866577148, + "learning_rate": 2.839092091947376e-05, + "loss": 0.3708, + "step": 6546, + "teacher_loss": 0.27823951840400696 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.4330669045448303, + "learning_rate": 2.8395258059852538e-05, + "loss": 0.3111, + "step": 6547, + "teacher_loss": 0.2975241243839264 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5385148525238037, + "learning_rate": 2.8399595200231316e-05, + "loss": 0.2926, + "step": 6548, + "teacher_loss": 0.26525411009788513 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5660387873649597, + "learning_rate": 2.8403932340610093e-05, + "loss": 0.2853, + "step": 6549, + "teacher_loss": 0.2540532946586609 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.20650511980056763, + "learning_rate": 2.840826948098887e-05, + "loss": 0.1709, + "step": 6550, + "teacher_loss": 0.1669941395521164 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.7499240636825562, + "learning_rate": 2.841260662136765e-05, + "loss": 0.3219, + "step": 6551, + "teacher_loss": 0.27432459592819214 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.616345226764679, + "learning_rate": 2.8416943761746423e-05, + "loss": 0.3452, + "step": 6552, + "teacher_loss": 0.31503725051879883 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.5716365575790405, + "learning_rate": 2.8421280902125197e-05, + "loss": 0.6612, + "step": 6553, + "teacher_loss": 0.6711024045944214 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.39616528153419495, + "learning_rate": 2.8425618042503975e-05, + "loss": 0.2395, + "step": 6554, + "teacher_loss": 0.22213412821292877 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.580011248588562, + "learning_rate": 2.8429955182882752e-05, + "loss": 0.4085, + "step": 6555, + "teacher_loss": 0.38944506645202637 + }, + { + "compression_loss": 0.0, + "epoch": 1.18, + "label_loss": 0.3728428781032562, + "learning_rate": 2.843429232326153e-05, + "loss": 0.2272, + "step": 6556, + "teacher_loss": 0.21099919080734253 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.2690393030643463, + "learning_rate": 2.8438629463640308e-05, + "loss": 0.3013, + "step": 6557, + "teacher_loss": 0.3049301505088806 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4846295416355133, + "learning_rate": 2.8442966604019085e-05, + "loss": 0.3562, + "step": 6558, + "teacher_loss": 0.3419593572616577 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4713246822357178, + "learning_rate": 2.8447303744397863e-05, + "loss": 0.2438, + "step": 6559, + "teacher_loss": 0.21851110458374023 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 1.3691496849060059, + "learning_rate": 2.845164088477664e-05, + "loss": 0.6125, + "step": 6560, + "teacher_loss": 0.5284755229949951 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.514237642288208, + "learning_rate": 2.8455978025155415e-05, + "loss": 0.4164, + "step": 6561, + "teacher_loss": 0.4055802822113037 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.2566221356391907, + "learning_rate": 2.8460315165534192e-05, + "loss": 0.2939, + "step": 6562, + "teacher_loss": 0.2980666756629944 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.32291653752326965, + "learning_rate": 2.846465230591297e-05, + "loss": 0.2083, + "step": 6563, + "teacher_loss": 0.1955586075782776 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.8036931753158569, + "learning_rate": 2.8468989446291744e-05, + "loss": 0.3654, + "step": 6564, + "teacher_loss": 0.31672126054763794 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4677231013774872, + "learning_rate": 2.8473326586670522e-05, + "loss": 0.3455, + "step": 6565, + "teacher_loss": 0.3319653868675232 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5142677426338196, + "learning_rate": 2.84776637270493e-05, + "loss": 0.3033, + "step": 6566, + "teacher_loss": 0.2798454463481903 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.2735411524772644, + "learning_rate": 2.8482000867428077e-05, + "loss": 0.1833, + "step": 6567, + "teacher_loss": 0.17324435710906982 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.31275707483291626, + "learning_rate": 2.8486338007806855e-05, + "loss": 0.2251, + "step": 6568, + "teacher_loss": 0.21538996696472168 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4707309305667877, + "learning_rate": 2.849067514818563e-05, + "loss": 0.3363, + "step": 6569, + "teacher_loss": 0.3213208019733429 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.7752500772476196, + "learning_rate": 2.8495012288564406e-05, + "loss": 0.304, + "step": 6570, + "teacher_loss": 0.2516320049762726 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.2664434015750885, + "learning_rate": 2.8499349428943184e-05, + "loss": 0.2395, + "step": 6571, + "teacher_loss": 0.23646846413612366 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.41426560282707214, + "learning_rate": 2.8503686569321962e-05, + "loss": 0.4227, + "step": 6572, + "teacher_loss": 0.42368924617767334 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5273882746696472, + "learning_rate": 2.850802370970074e-05, + "loss": 0.2277, + "step": 6573, + "teacher_loss": 0.19439566135406494 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5038734674453735, + "learning_rate": 2.8512360850079514e-05, + "loss": 0.2748, + "step": 6574, + "teacher_loss": 0.24934571981430054 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.729423463344574, + "learning_rate": 2.851669799045829e-05, + "loss": 0.3781, + "step": 6575, + "teacher_loss": 0.3391048312187195 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5298543572425842, + "learning_rate": 2.852103513083707e-05, + "loss": 0.3803, + "step": 6576, + "teacher_loss": 0.3637186288833618 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.3247554898262024, + "learning_rate": 2.8525372271215846e-05, + "loss": 0.2286, + "step": 6577, + "teacher_loss": 0.21786251664161682 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5020084381103516, + "learning_rate": 2.852970941159462e-05, + "loss": 0.2289, + "step": 6578, + "teacher_loss": 0.19850805401802063 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.7192646861076355, + "learning_rate": 2.8534046551973398e-05, + "loss": 0.3949, + "step": 6579, + "teacher_loss": 0.35884955525398254 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.18281190097332, + "learning_rate": 2.8538383692352176e-05, + "loss": 0.1863, + "step": 6580, + "teacher_loss": 0.1866425722837448 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.251709908246994, + "learning_rate": 2.8542720832730954e-05, + "loss": 0.1922, + "step": 6581, + "teacher_loss": 0.18557637929916382 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5090420842170715, + "learning_rate": 2.854705797310973e-05, + "loss": 0.2757, + "step": 6582, + "teacher_loss": 0.2497730851173401 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.3213367462158203, + "learning_rate": 2.855139511348851e-05, + "loss": 0.2323, + "step": 6583, + "teacher_loss": 0.22244617342948914 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.34558022022247314, + "learning_rate": 2.8555732253867286e-05, + "loss": 0.2649, + "step": 6584, + "teacher_loss": 0.2559163570404053 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.21683089435100555, + "learning_rate": 2.856006939424606e-05, + "loss": 0.2107, + "step": 6585, + "teacher_loss": 0.21000029146671295 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.6775074005126953, + "learning_rate": 2.8564406534624838e-05, + "loss": 0.2808, + "step": 6586, + "teacher_loss": 0.2367245852947235 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.319302499294281, + "learning_rate": 2.8568743675003612e-05, + "loss": 0.3563, + "step": 6587, + "teacher_loss": 0.3603971302509308 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4299470782279968, + "learning_rate": 2.857308081538239e-05, + "loss": 0.2441, + "step": 6588, + "teacher_loss": 0.2234463095664978 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.28330114483833313, + "learning_rate": 2.8577417955761168e-05, + "loss": 0.2017, + "step": 6589, + "teacher_loss": 0.1926369071006775 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.2400030493736267, + "learning_rate": 2.8581755096139945e-05, + "loss": 0.2206, + "step": 6590, + "teacher_loss": 0.21847641468048096 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.28033968806266785, + "learning_rate": 2.8586092236518723e-05, + "loss": 0.1746, + "step": 6591, + "teacher_loss": 0.16286763548851013 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.546729326248169, + "learning_rate": 2.85904293768975e-05, + "loss": 0.1642, + "step": 6592, + "teacher_loss": 0.12164296209812164 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.36625936627388, + "learning_rate": 2.8594766517276278e-05, + "loss": 0.268, + "step": 6593, + "teacher_loss": 0.257112979888916 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4279859960079193, + "learning_rate": 2.8599103657655056e-05, + "loss": 0.2416, + "step": 6594, + "teacher_loss": 0.22083792090415955 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5370991826057434, + "learning_rate": 2.8603440798033833e-05, + "loss": 0.2155, + "step": 6595, + "teacher_loss": 0.17976446449756622 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4296658933162689, + "learning_rate": 2.8607777938412604e-05, + "loss": 0.213, + "step": 6596, + "teacher_loss": 0.18888802826404572 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.3599996566772461, + "learning_rate": 2.8612115078791382e-05, + "loss": 0.3225, + "step": 6597, + "teacher_loss": 0.3183194398880005 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.7844080328941345, + "learning_rate": 2.861645221917016e-05, + "loss": 0.3243, + "step": 6598, + "teacher_loss": 0.27320629358291626 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.36250829696655273, + "learning_rate": 2.8620789359548937e-05, + "loss": 0.2214, + "step": 6599, + "teacher_loss": 0.20575733482837677 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.26487189531326294, + "learning_rate": 2.8625126499927715e-05, + "loss": 0.2644, + "step": 6600, + "teacher_loss": 0.26434916257858276 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.7904253005981445, + "learning_rate": 2.8629463640306492e-05, + "loss": 0.3388, + "step": 6601, + "teacher_loss": 0.2886185050010681 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.6863570213317871, + "learning_rate": 2.863380078068527e-05, + "loss": 0.3363, + "step": 6602, + "teacher_loss": 0.29735904932022095 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.3948451578617096, + "learning_rate": 2.8638137921064048e-05, + "loss": 0.2276, + "step": 6603, + "teacher_loss": 0.2090112417936325 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.6098463535308838, + "learning_rate": 2.8642475061442825e-05, + "loss": 0.3182, + "step": 6604, + "teacher_loss": 0.28574201464653015 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.5835431218147278, + "learning_rate": 2.86468122018216e-05, + "loss": 0.2352, + "step": 6605, + "teacher_loss": 0.19644707441329956 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.4386568069458008, + "learning_rate": 2.8651149342200377e-05, + "loss": 0.2379, + "step": 6606, + "teacher_loss": 0.21559371054172516 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.3548958897590637, + "learning_rate": 2.865548648257915e-05, + "loss": 0.3312, + "step": 6607, + "teacher_loss": 0.32857823371887207 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.6731530427932739, + "learning_rate": 2.865982362295793e-05, + "loss": 0.2753, + "step": 6608, + "teacher_loss": 0.23104353249073029 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.10890144854784012, + "learning_rate": 2.8664160763336707e-05, + "loss": 0.158, + "step": 6609, + "teacher_loss": 0.16347917914390564 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.8006898164749146, + "learning_rate": 2.8668497903715484e-05, + "loss": 0.4598, + "step": 6610, + "teacher_loss": 0.42188167572021484 + }, + { + "compression_loss": 0.0, + "epoch": 1.19, + "label_loss": 0.3550529479980469, + "learning_rate": 2.8672835044094262e-05, + "loss": 0.2205, + "step": 6611, + "teacher_loss": 0.20557433366775513 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5019738674163818, + "learning_rate": 2.867717218447304e-05, + "loss": 0.4294, + "step": 6612, + "teacher_loss": 0.42134717106819153 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3378683924674988, + "learning_rate": 2.8681509324851817e-05, + "loss": 0.2179, + "step": 6613, + "teacher_loss": 0.2046101838350296 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3226926922798157, + "learning_rate": 2.868584646523059e-05, + "loss": 0.2946, + "step": 6614, + "teacher_loss": 0.2915023863315582 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5214836001396179, + "learning_rate": 2.869018360560937e-05, + "loss": 0.2337, + "step": 6615, + "teacher_loss": 0.20174537599086761 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 1.2825591564178467, + "learning_rate": 2.8694520745988147e-05, + "loss": 0.4112, + "step": 6616, + "teacher_loss": 0.3143288493156433 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5607950091362, + "learning_rate": 2.8698857886366924e-05, + "loss": 0.2505, + "step": 6617, + "teacher_loss": 0.21606168150901794 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.6755258440971375, + "learning_rate": 2.87031950267457e-05, + "loss": 0.3026, + "step": 6618, + "teacher_loss": 0.2611140310764313 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3434546887874603, + "learning_rate": 2.8707532167124476e-05, + "loss": 0.2245, + "step": 6619, + "teacher_loss": 0.21127015352249146 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3250124454498291, + "learning_rate": 2.8711869307503254e-05, + "loss": 0.1893, + "step": 6620, + "teacher_loss": 0.1742463856935501 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.33393198251724243, + "learning_rate": 2.871620644788203e-05, + "loss": 0.2183, + "step": 6621, + "teacher_loss": 0.2055039405822754 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.35815316438674927, + "learning_rate": 2.8720543588260806e-05, + "loss": 0.2118, + "step": 6622, + "teacher_loss": 0.19549237191677094 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.47120630741119385, + "learning_rate": 2.8724880728639583e-05, + "loss": 0.2138, + "step": 6623, + "teacher_loss": 0.18520459532737732 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.30158618092536926, + "learning_rate": 2.872921786901836e-05, + "loss": 0.2376, + "step": 6624, + "teacher_loss": 0.23050986230373383 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3577457070350647, + "learning_rate": 2.873355500939714e-05, + "loss": 0.366, + "step": 6625, + "teacher_loss": 0.36687615513801575 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5216197967529297, + "learning_rate": 2.8737892149775916e-05, + "loss": 0.2291, + "step": 6626, + "teacher_loss": 0.1965874582529068 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4354352355003357, + "learning_rate": 2.8742229290154694e-05, + "loss": 0.2185, + "step": 6627, + "teacher_loss": 0.19436612725257874 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4702063202857971, + "learning_rate": 2.874656643053347e-05, + "loss": 0.2932, + "step": 6628, + "teacher_loss": 0.2735503911972046 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.398676335811615, + "learning_rate": 2.8750903570912246e-05, + "loss": 0.2501, + "step": 6629, + "teacher_loss": 0.2335902750492096 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3202178478240967, + "learning_rate": 2.8755240711291023e-05, + "loss": 0.3217, + "step": 6630, + "teacher_loss": 0.3218112587928772 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.2990373969078064, + "learning_rate": 2.8759577851669797e-05, + "loss": 0.2986, + "step": 6631, + "teacher_loss": 0.29856571555137634 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 1.0931466817855835, + "learning_rate": 2.8763914992048575e-05, + "loss": 0.4435, + "step": 6632, + "teacher_loss": 0.3712702989578247 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5398867726325989, + "learning_rate": 2.8768252132427353e-05, + "loss": 0.2397, + "step": 6633, + "teacher_loss": 0.20639733970165253 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4573324918746948, + "learning_rate": 2.877258927280613e-05, + "loss": 0.2388, + "step": 6634, + "teacher_loss": 0.2145230621099472 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.28099197149276733, + "learning_rate": 2.8776926413184908e-05, + "loss": 0.2576, + "step": 6635, + "teacher_loss": 0.25499945878982544 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3202160596847534, + "learning_rate": 2.8781263553563685e-05, + "loss": 0.3592, + "step": 6636, + "teacher_loss": 0.3635649085044861 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.26653724908828735, + "learning_rate": 2.8785600693942463e-05, + "loss": 0.1894, + "step": 6637, + "teacher_loss": 0.18082335591316223 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3379601836204529, + "learning_rate": 2.878993783432124e-05, + "loss": 0.2285, + "step": 6638, + "teacher_loss": 0.21635481715202332 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.577174186706543, + "learning_rate": 2.879427497470002e-05, + "loss": 0.2514, + "step": 6639, + "teacher_loss": 0.21521402895450592 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4185717701911926, + "learning_rate": 2.879861211507879e-05, + "loss": 0.2294, + "step": 6640, + "teacher_loss": 0.20843356847763062 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.6753969788551331, + "learning_rate": 2.8802949255457567e-05, + "loss": 0.2656, + "step": 6641, + "teacher_loss": 0.2200138121843338 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.7188202738761902, + "learning_rate": 2.8807286395836344e-05, + "loss": 0.4033, + "step": 6642, + "teacher_loss": 0.36827534437179565 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.49699872732162476, + "learning_rate": 2.8811623536215122e-05, + "loss": 0.3033, + "step": 6643, + "teacher_loss": 0.28176286816596985 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.45720675587654114, + "learning_rate": 2.88159606765939e-05, + "loss": 0.2886, + "step": 6644, + "teacher_loss": 0.2698439359664917 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.6596108675003052, + "learning_rate": 2.8820297816972677e-05, + "loss": 0.216, + "step": 6645, + "teacher_loss": 0.1666649580001831 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.6086090207099915, + "learning_rate": 2.8824634957351455e-05, + "loss": 0.3046, + "step": 6646, + "teacher_loss": 0.27079203724861145 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4501751661300659, + "learning_rate": 2.8828972097730233e-05, + "loss": 0.2966, + "step": 6647, + "teacher_loss": 0.27955523133277893 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.06954265385866165, + "learning_rate": 2.883330923810901e-05, + "loss": 0.3078, + "step": 6648, + "teacher_loss": 0.33429253101348877 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.40952762961387634, + "learning_rate": 2.8837646378487784e-05, + "loss": 0.2356, + "step": 6649, + "teacher_loss": 0.216274693608284 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.6217672824859619, + "learning_rate": 2.8841983518866562e-05, + "loss": 0.2936, + "step": 6650, + "teacher_loss": 0.25718235969543457 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5222299098968506, + "learning_rate": 2.8846320659245336e-05, + "loss": 0.3137, + "step": 6651, + "teacher_loss": 0.29052168130874634 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.3392089009284973, + "learning_rate": 2.8850657799624114e-05, + "loss": 0.2378, + "step": 6652, + "teacher_loss": 0.2265438735485077 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.687258780002594, + "learning_rate": 2.885499494000289e-05, + "loss": 0.4006, + "step": 6653, + "teacher_loss": 0.3688029646873474 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.520557165145874, + "learning_rate": 2.885933208038167e-05, + "loss": 0.2989, + "step": 6654, + "teacher_loss": 0.27431195974349976 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.29005491733551025, + "learning_rate": 2.8863669220760447e-05, + "loss": 0.2166, + "step": 6655, + "teacher_loss": 0.20847874879837036 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.256401389837265, + "learning_rate": 2.8868006361139224e-05, + "loss": 0.2782, + "step": 6656, + "teacher_loss": 0.2806296944618225 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.20678222179412842, + "learning_rate": 2.8872343501518002e-05, + "loss": 0.3778, + "step": 6657, + "teacher_loss": 0.39678436517715454 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4107311964035034, + "learning_rate": 2.8876680641896776e-05, + "loss": 0.2398, + "step": 6658, + "teacher_loss": 0.22079822421073914 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.8440202474594116, + "learning_rate": 2.8881017782275554e-05, + "loss": 0.2901, + "step": 6659, + "teacher_loss": 0.22854480147361755 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.19675973057746887, + "learning_rate": 2.888535492265433e-05, + "loss": 0.1634, + "step": 6660, + "teacher_loss": 0.15974470973014832 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.60176020860672, + "learning_rate": 2.888969206303311e-05, + "loss": 0.3025, + "step": 6661, + "teacher_loss": 0.26924076676368713 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.5388343930244446, + "learning_rate": 2.8894029203411883e-05, + "loss": 0.2981, + "step": 6662, + "teacher_loss": 0.27132290601730347 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.23070412874221802, + "learning_rate": 2.889836634379066e-05, + "loss": 0.2998, + "step": 6663, + "teacher_loss": 0.3074338734149933 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.4498476982116699, + "learning_rate": 2.890270348416944e-05, + "loss": 0.2971, + "step": 6664, + "teacher_loss": 0.28008365631103516 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.7022789120674133, + "learning_rate": 2.8907040624548216e-05, + "loss": 0.2903, + "step": 6665, + "teacher_loss": 0.24454087018966675 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.2166617214679718, + "learning_rate": 2.891137776492699e-05, + "loss": 0.1539, + "step": 6666, + "teacher_loss": 0.1469482183456421 + }, + { + "compression_loss": 0.0, + "epoch": 1.2, + "label_loss": 0.26323434710502625, + "learning_rate": 2.8915714905305768e-05, + "loss": 0.2308, + "step": 6667, + "teacher_loss": 0.22717049717903137 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.46240225434303284, + "learning_rate": 2.8920052045684546e-05, + "loss": 0.2526, + "step": 6668, + "teacher_loss": 0.22924943268299103 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5485644936561584, + "learning_rate": 2.8924389186063323e-05, + "loss": 0.2741, + "step": 6669, + "teacher_loss": 0.24356421828269958 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.509672999382019, + "learning_rate": 2.89287263264421e-05, + "loss": 0.2127, + "step": 6670, + "teacher_loss": 0.17970570921897888 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.17245179414749146, + "learning_rate": 2.893306346682088e-05, + "loss": 0.221, + "step": 6671, + "teacher_loss": 0.226434588432312 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.4979356527328491, + "learning_rate": 2.8937400607199653e-05, + "loss": 0.3593, + "step": 6672, + "teacher_loss": 0.34384244680404663 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.6386487483978271, + "learning_rate": 2.894173774757843e-05, + "loss": 0.389, + "step": 6673, + "teacher_loss": 0.36125892400741577 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5501528978347778, + "learning_rate": 2.8946074887957208e-05, + "loss": 0.2757, + "step": 6674, + "teacher_loss": 0.24518704414367676 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.3335050940513611, + "learning_rate": 2.8950412028335982e-05, + "loss": 0.256, + "step": 6675, + "teacher_loss": 0.24737989902496338 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.6204159259796143, + "learning_rate": 2.895474916871476e-05, + "loss": 0.3445, + "step": 6676, + "teacher_loss": 0.31383490562438965 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.45950818061828613, + "learning_rate": 2.8959086309093537e-05, + "loss": 0.2217, + "step": 6677, + "teacher_loss": 0.19529595971107483 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.7635051608085632, + "learning_rate": 2.8963423449472315e-05, + "loss": 0.2965, + "step": 6678, + "teacher_loss": 0.24461869895458221 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.32681602239608765, + "learning_rate": 2.8967760589851093e-05, + "loss": 0.2042, + "step": 6679, + "teacher_loss": 0.19054418802261353 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.6956307888031006, + "learning_rate": 2.897209773022987e-05, + "loss": 0.2578, + "step": 6680, + "teacher_loss": 0.20915260910987854 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5187751650810242, + "learning_rate": 2.8976434870608648e-05, + "loss": 0.2748, + "step": 6681, + "teacher_loss": 0.2476627379655838 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.19392740726470947, + "learning_rate": 2.8980772010987426e-05, + "loss": 0.166, + "step": 6682, + "teacher_loss": 0.16284844279289246 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.37802237272262573, + "learning_rate": 2.89851091513662e-05, + "loss": 0.2296, + "step": 6683, + "teacher_loss": 0.21308761835098267 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.7346153259277344, + "learning_rate": 2.8989446291744974e-05, + "loss": 0.2987, + "step": 6684, + "teacher_loss": 0.25031134486198425 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.3306788504123688, + "learning_rate": 2.899378343212375e-05, + "loss": 0.1675, + "step": 6685, + "teacher_loss": 0.14933837950229645 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.11020047217607498, + "learning_rate": 2.899812057250253e-05, + "loss": 0.1711, + "step": 6686, + "teacher_loss": 0.1778886914253235 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.21904563903808594, + "learning_rate": 2.9002457712881307e-05, + "loss": 0.2115, + "step": 6687, + "teacher_loss": 0.2106417715549469 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.7435877323150635, + "learning_rate": 2.9006794853260085e-05, + "loss": 0.2854, + "step": 6688, + "teacher_loss": 0.23450762033462524 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.31456121802330017, + "learning_rate": 2.9011131993638862e-05, + "loss": 0.2086, + "step": 6689, + "teacher_loss": 0.1968565583229065 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.8701610565185547, + "learning_rate": 2.901546913401764e-05, + "loss": 0.3369, + "step": 6690, + "teacher_loss": 0.27767497301101685 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5021225214004517, + "learning_rate": 2.9019806274396417e-05, + "loss": 0.6658, + "step": 6691, + "teacher_loss": 0.6840010285377502 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.7837206721305847, + "learning_rate": 2.9024143414775195e-05, + "loss": 0.3258, + "step": 6692, + "teacher_loss": 0.2749195098876953 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.18329495191574097, + "learning_rate": 2.902848055515397e-05, + "loss": 0.1873, + "step": 6693, + "teacher_loss": 0.18776541948318481 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.314161479473114, + "learning_rate": 2.9032817695532744e-05, + "loss": 0.2779, + "step": 6694, + "teacher_loss": 0.2738340497016907 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.4953298568725586, + "learning_rate": 2.903715483591152e-05, + "loss": 0.2429, + "step": 6695, + "teacher_loss": 0.2148183435201645 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.3808950185775757, + "learning_rate": 2.90414919762903e-05, + "loss": 0.3526, + "step": 6696, + "teacher_loss": 0.3495006561279297 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.13821841776371002, + "learning_rate": 2.9045829116669076e-05, + "loss": 0.1962, + "step": 6697, + "teacher_loss": 0.20265763998031616 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.3015595078468323, + "learning_rate": 2.9050166257047854e-05, + "loss": 0.2276, + "step": 6698, + "teacher_loss": 0.21937115490436554 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.404385507106781, + "learning_rate": 2.905450339742663e-05, + "loss": 0.2427, + "step": 6699, + "teacher_loss": 0.2246958315372467 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.6914939284324646, + "learning_rate": 2.905884053780541e-05, + "loss": 0.3853, + "step": 6700, + "teacher_loss": 0.3512588143348694 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.376018762588501, + "learning_rate": 2.9063177678184187e-05, + "loss": 0.3181, + "step": 6701, + "teacher_loss": 0.3116256594657898 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5555792450904846, + "learning_rate": 2.906751481856296e-05, + "loss": 0.2687, + "step": 6702, + "teacher_loss": 0.2368665337562561 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.2588590681552887, + "learning_rate": 2.907185195894174e-05, + "loss": 0.2589, + "step": 6703, + "teacher_loss": 0.25895532965660095 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.6085354685783386, + "learning_rate": 2.9076189099320516e-05, + "loss": 0.2443, + "step": 6704, + "teacher_loss": 0.20385757088661194 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.49690741300582886, + "learning_rate": 2.908052623969929e-05, + "loss": 0.2494, + "step": 6705, + "teacher_loss": 0.22194766998291016 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.2839939594268799, + "learning_rate": 2.9084863380078068e-05, + "loss": 0.3178, + "step": 6706, + "teacher_loss": 0.32156872749328613 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.29617321491241455, + "learning_rate": 2.9089200520456846e-05, + "loss": 0.2386, + "step": 6707, + "teacher_loss": 0.23216180503368378 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.6152980327606201, + "learning_rate": 2.9093537660835623e-05, + "loss": 0.3247, + "step": 6708, + "teacher_loss": 0.2924190163612366 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.2449013739824295, + "learning_rate": 2.90978748012144e-05, + "loss": 0.1819, + "step": 6709, + "teacher_loss": 0.174947589635849 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 1.5153051614761353, + "learning_rate": 2.9102211941593175e-05, + "loss": 0.7138, + "step": 6710, + "teacher_loss": 0.624751091003418 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5430542230606079, + "learning_rate": 2.9106549081971953e-05, + "loss": 0.2183, + "step": 6711, + "teacher_loss": 0.18225625157356262 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.613983154296875, + "learning_rate": 2.911088622235073e-05, + "loss": 0.2474, + "step": 6712, + "teacher_loss": 0.2066924273967743 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.4491587281227112, + "learning_rate": 2.9115223362729508e-05, + "loss": 0.4486, + "step": 6713, + "teacher_loss": 0.448542058467865 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.2147151529788971, + "learning_rate": 2.9119560503108286e-05, + "loss": 0.2123, + "step": 6714, + "teacher_loss": 0.21206744015216827 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.3600481152534485, + "learning_rate": 2.9123897643487063e-05, + "loss": 0.2656, + "step": 6715, + "teacher_loss": 0.25508952140808105 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.26259976625442505, + "learning_rate": 2.9128234783865838e-05, + "loss": 0.2581, + "step": 6716, + "teacher_loss": 0.25764578580856323 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.2889711856842041, + "learning_rate": 2.9132571924244615e-05, + "loss": 0.3403, + "step": 6717, + "teacher_loss": 0.3460536003112793 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.5844759345054626, + "learning_rate": 2.9136909064623393e-05, + "loss": 0.3128, + "step": 6718, + "teacher_loss": 0.28263533115386963 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.35446763038635254, + "learning_rate": 2.9141246205002167e-05, + "loss": 0.2943, + "step": 6719, + "teacher_loss": 0.2875811457633972 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.19606265425682068, + "learning_rate": 2.9145583345380945e-05, + "loss": 0.3495, + "step": 6720, + "teacher_loss": 0.36655569076538086 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.4621555805206299, + "learning_rate": 2.9149920485759722e-05, + "loss": 0.3256, + "step": 6721, + "teacher_loss": 0.3104441165924072 + }, + { + "compression_loss": 0.0, + "epoch": 1.21, + "label_loss": 0.23333001136779785, + "learning_rate": 2.91542576261385e-05, + "loss": 0.2579, + "step": 6722, + "teacher_loss": 0.2606227993965149 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.39802294969558716, + "learning_rate": 2.9158594766517278e-05, + "loss": 0.2167, + "step": 6723, + "teacher_loss": 0.19659563899040222 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.4610007405281067, + "learning_rate": 2.9162931906896055e-05, + "loss": 0.2659, + "step": 6724, + "teacher_loss": 0.24419671297073364 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.2381897270679474, + "learning_rate": 2.9167269047274833e-05, + "loss": 0.1551, + "step": 6725, + "teacher_loss": 0.14586922526359558 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.22514736652374268, + "learning_rate": 2.917160618765361e-05, + "loss": 0.2137, + "step": 6726, + "teacher_loss": 0.21245279908180237 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.2898510694503784, + "learning_rate": 2.9175943328032385e-05, + "loss": 0.2542, + "step": 6727, + "teacher_loss": 0.2502511739730835 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.8047339916229248, + "learning_rate": 2.918028046841116e-05, + "loss": 0.5926, + "step": 6728, + "teacher_loss": 0.5690531730651855 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.45106422901153564, + "learning_rate": 2.9184617608789937e-05, + "loss": 0.1948, + "step": 6729, + "teacher_loss": 0.16636264324188232 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.529430091381073, + "learning_rate": 2.9188954749168714e-05, + "loss": 0.2796, + "step": 6730, + "teacher_loss": 0.2518298625946045 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.42361733317375183, + "learning_rate": 2.9193291889547492e-05, + "loss": 0.3716, + "step": 6731, + "teacher_loss": 0.36581915616989136 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.49601200222969055, + "learning_rate": 2.919762902992627e-05, + "loss": 0.1831, + "step": 6732, + "teacher_loss": 0.14833585917949677 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.36111313104629517, + "learning_rate": 2.9201966170305047e-05, + "loss": 0.2332, + "step": 6733, + "teacher_loss": 0.2189566195011139 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.4475724697113037, + "learning_rate": 2.9206303310683825e-05, + "loss": 0.4658, + "step": 6734, + "teacher_loss": 0.4678168296813965 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.8518809080123901, + "learning_rate": 2.9210640451062602e-05, + "loss": 0.319, + "step": 6735, + "teacher_loss": 0.2597949206829071 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.17262616753578186, + "learning_rate": 2.921497759144138e-05, + "loss": 0.1578, + "step": 6736, + "teacher_loss": 0.15610334277153015 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5860235691070557, + "learning_rate": 2.9219314731820154e-05, + "loss": 0.344, + "step": 6737, + "teacher_loss": 0.31710493564605713 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.4595158100128174, + "learning_rate": 2.922365187219893e-05, + "loss": 0.2374, + "step": 6738, + "teacher_loss": 0.21273310482501984 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.28825536370277405, + "learning_rate": 2.9227989012577706e-05, + "loss": 0.1638, + "step": 6739, + "teacher_loss": 0.14993247389793396 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.44624924659729004, + "learning_rate": 2.9232326152956484e-05, + "loss": 0.2774, + "step": 6740, + "teacher_loss": 0.2586672306060791 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.8333386182785034, + "learning_rate": 2.923666329333526e-05, + "loss": 0.3883, + "step": 6741, + "teacher_loss": 0.3388686776161194 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.20976148545742035, + "learning_rate": 2.924100043371404e-05, + "loss": 0.2282, + "step": 6742, + "teacher_loss": 0.23023319244384766 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.28107914328575134, + "learning_rate": 2.9245337574092816e-05, + "loss": 0.1964, + "step": 6743, + "teacher_loss": 0.18703222274780273 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5735495090484619, + "learning_rate": 2.9249674714471594e-05, + "loss": 0.24, + "step": 6744, + "teacher_loss": 0.20295582711696625 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5373222827911377, + "learning_rate": 2.9254011854850372e-05, + "loss": 0.2017, + "step": 6745, + "teacher_loss": 0.16438668966293335 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.41825371980667114, + "learning_rate": 2.9258348995229146e-05, + "loss": 0.3412, + "step": 6746, + "teacher_loss": 0.33268415927886963 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.4269944429397583, + "learning_rate": 2.9262686135607924e-05, + "loss": 0.1984, + "step": 6747, + "teacher_loss": 0.17300693690776825 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5833960175514221, + "learning_rate": 2.92670232759867e-05, + "loss": 0.3967, + "step": 6748, + "teacher_loss": 0.37595587968826294 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.23058518767356873, + "learning_rate": 2.9271360416365475e-05, + "loss": 0.1651, + "step": 6749, + "teacher_loss": 0.15784680843353271 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.47358250617980957, + "learning_rate": 2.9275697556744253e-05, + "loss": 0.2516, + "step": 6750, + "teacher_loss": 0.22691065073013306 + }, + { + "epoch": 1.22, + "eval_exact_match": 79.47019867549669, + "eval_f1": 87.08673632005052, + "step": 6750 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.14281851053237915, + "learning_rate": 2.928003469712303e-05, + "loss": 0.1492, + "step": 6751, + "teacher_loss": 0.149949312210083 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5668355822563171, + "learning_rate": 2.928437183750181e-05, + "loss": 0.3003, + "step": 6752, + "teacher_loss": 0.27065181732177734 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.26105135679244995, + "learning_rate": 2.9288708977880586e-05, + "loss": 0.1884, + "step": 6753, + "teacher_loss": 0.18028172850608826 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.31634920835494995, + "learning_rate": 2.929304611825936e-05, + "loss": 0.2223, + "step": 6754, + "teacher_loss": 0.2118682563304901 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.36808857321739197, + "learning_rate": 2.9297383258638138e-05, + "loss": 0.1891, + "step": 6755, + "teacher_loss": 0.16924867033958435 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.6843034029006958, + "learning_rate": 2.9301720399016915e-05, + "loss": 0.4061, + "step": 6756, + "teacher_loss": 0.37513428926467896 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.251117080450058, + "learning_rate": 2.9306057539395693e-05, + "loss": 0.2274, + "step": 6757, + "teacher_loss": 0.22479116916656494 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.7825935482978821, + "learning_rate": 2.931039467977447e-05, + "loss": 0.322, + "step": 6758, + "teacher_loss": 0.2708261013031006 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.32662642002105713, + "learning_rate": 2.9314731820153248e-05, + "loss": 0.1781, + "step": 6759, + "teacher_loss": 0.1616249829530716 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.4021502137184143, + "learning_rate": 2.9319068960532023e-05, + "loss": 0.3657, + "step": 6760, + "teacher_loss": 0.3616999387741089 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.3031701445579529, + "learning_rate": 2.93234061009108e-05, + "loss": 0.2345, + "step": 6761, + "teacher_loss": 0.2269253134727478 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.36989375948905945, + "learning_rate": 2.9327743241289578e-05, + "loss": 0.2905, + "step": 6762, + "teacher_loss": 0.2817283272743225 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.18691489100456238, + "learning_rate": 2.9332080381668352e-05, + "loss": 0.2453, + "step": 6763, + "teacher_loss": 0.2517717480659485 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.8611816167831421, + "learning_rate": 2.933641752204713e-05, + "loss": 0.5945, + "step": 6764, + "teacher_loss": 0.5648839473724365 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.3448590040206909, + "learning_rate": 2.9340754662425907e-05, + "loss": 0.2392, + "step": 6765, + "teacher_loss": 0.22744786739349365 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5437180995941162, + "learning_rate": 2.9345091802804685e-05, + "loss": 0.4036, + "step": 6766, + "teacher_loss": 0.3880671262741089 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.6874891519546509, + "learning_rate": 2.9349428943183462e-05, + "loss": 0.3058, + "step": 6767, + "teacher_loss": 0.26336002349853516 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5390881299972534, + "learning_rate": 2.935376608356224e-05, + "loss": 0.5098, + "step": 6768, + "teacher_loss": 0.5065224766731262 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.1827385425567627, + "learning_rate": 2.9358103223941018e-05, + "loss": 0.2088, + "step": 6769, + "teacher_loss": 0.2116827368736267 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.46948230266571045, + "learning_rate": 2.9362440364319792e-05, + "loss": 0.2793, + "step": 6770, + "teacher_loss": 0.2581137418746948 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.23580576479434967, + "learning_rate": 2.936677750469857e-05, + "loss": 0.187, + "step": 6771, + "teacher_loss": 0.181581050157547 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.7922438383102417, + "learning_rate": 2.9371114645077344e-05, + "loss": 0.2944, + "step": 6772, + "teacher_loss": 0.23913924396038055 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.3974483013153076, + "learning_rate": 2.937545178545612e-05, + "loss": 0.2784, + "step": 6773, + "teacher_loss": 0.2651284337043762 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.4929448366165161, + "learning_rate": 2.93797889258349e-05, + "loss": 0.2425, + "step": 6774, + "teacher_loss": 0.21462717652320862 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5806875824928284, + "learning_rate": 2.9384126066213677e-05, + "loss": 0.3026, + "step": 6775, + "teacher_loss": 0.27164673805236816 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.2874259948730469, + "learning_rate": 2.9388463206592454e-05, + "loss": 0.2783, + "step": 6776, + "teacher_loss": 0.2772985100746155 + }, + { + "compression_loss": 0.0, + "epoch": 1.22, + "label_loss": 0.5340349674224854, + "learning_rate": 2.9392800346971232e-05, + "loss": 0.2609, + "step": 6777, + "teacher_loss": 0.2305103838443756 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.7590471506118774, + "learning_rate": 2.939713748735001e-05, + "loss": 0.2973, + "step": 6778, + "teacher_loss": 0.24595658481121063 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.4682631492614746, + "learning_rate": 2.9401474627728787e-05, + "loss": 0.2181, + "step": 6779, + "teacher_loss": 0.1903439164161682 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.24390703439712524, + "learning_rate": 2.9405811768107565e-05, + "loss": 0.2025, + "step": 6780, + "teacher_loss": 0.19793207943439484 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6501028537750244, + "learning_rate": 2.9410148908486336e-05, + "loss": 0.3255, + "step": 6781, + "teacher_loss": 0.28944146633148193 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.20671355724334717, + "learning_rate": 2.9414486048865113e-05, + "loss": 0.2272, + "step": 6782, + "teacher_loss": 0.22946389019489288 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.462342232465744, + "learning_rate": 2.941882318924389e-05, + "loss": 0.281, + "step": 6783, + "teacher_loss": 0.26079607009887695 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6197547912597656, + "learning_rate": 2.942316032962267e-05, + "loss": 0.3039, + "step": 6784, + "teacher_loss": 0.2688131034374237 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.46017757058143616, + "learning_rate": 2.9427497470001446e-05, + "loss": 0.3332, + "step": 6785, + "teacher_loss": 0.31913435459136963 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.49768027663230896, + "learning_rate": 2.9431834610380224e-05, + "loss": 0.2101, + "step": 6786, + "teacher_loss": 0.17813362181186676 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5406091213226318, + "learning_rate": 2.9436171750759e-05, + "loss": 0.3867, + "step": 6787, + "teacher_loss": 0.36960452795028687 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.3446793258190155, + "learning_rate": 2.944050889113778e-05, + "loss": 0.268, + "step": 6788, + "teacher_loss": 0.25949758291244507 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6057062149047852, + "learning_rate": 2.9444846031516557e-05, + "loss": 0.3534, + "step": 6789, + "teacher_loss": 0.32541871070861816 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.3547956347465515, + "learning_rate": 2.944918317189533e-05, + "loss": 0.3472, + "step": 6790, + "teacher_loss": 0.3463127613067627 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.12161026149988174, + "learning_rate": 2.945352031227411e-05, + "loss": 0.225, + "step": 6791, + "teacher_loss": 0.2365073263645172 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.7441861033439636, + "learning_rate": 2.9457857452652883e-05, + "loss": 0.2944, + "step": 6792, + "teacher_loss": 0.24440601468086243 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.30074024200439453, + "learning_rate": 2.946219459303166e-05, + "loss": 0.2229, + "step": 6793, + "teacher_loss": 0.21424353122711182 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.8027220368385315, + "learning_rate": 2.9466531733410438e-05, + "loss": 0.29, + "step": 6794, + "teacher_loss": 0.2330566644668579 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.8364348411560059, + "learning_rate": 2.9470868873789216e-05, + "loss": 0.3632, + "step": 6795, + "teacher_loss": 0.3106558322906494 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.28005072474479675, + "learning_rate": 2.9475206014167993e-05, + "loss": 0.2533, + "step": 6796, + "teacher_loss": 0.2503625154495239 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.21674242615699768, + "learning_rate": 2.947954315454677e-05, + "loss": 0.1867, + "step": 6797, + "teacher_loss": 0.18339872360229492 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.26481059193611145, + "learning_rate": 2.948388029492555e-05, + "loss": 0.2692, + "step": 6798, + "teacher_loss": 0.269730806350708 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5599538087844849, + "learning_rate": 2.9488217435304323e-05, + "loss": 0.2251, + "step": 6799, + "teacher_loss": 0.1878460943698883 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.7688077688217163, + "learning_rate": 2.94925545756831e-05, + "loss": 0.3846, + "step": 6800, + "teacher_loss": 0.34188467264175415 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5688630938529968, + "learning_rate": 2.9496891716061878e-05, + "loss": 0.3011, + "step": 6801, + "teacher_loss": 0.27129557728767395 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5558436512947083, + "learning_rate": 2.9501228856440656e-05, + "loss": 0.256, + "step": 6802, + "teacher_loss": 0.2227122187614441 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.39539194107055664, + "learning_rate": 2.950556599681943e-05, + "loss": 0.2597, + "step": 6803, + "teacher_loss": 0.2446136474609375 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.2846057415008545, + "learning_rate": 2.9509903137198207e-05, + "loss": 0.2233, + "step": 6804, + "teacher_loss": 0.21647527813911438 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6721909046173096, + "learning_rate": 2.9514240277576985e-05, + "loss": 0.3141, + "step": 6805, + "teacher_loss": 0.2742575705051422 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.3298438489437103, + "learning_rate": 2.9518577417955763e-05, + "loss": 0.253, + "step": 6806, + "teacher_loss": 0.24450691044330597 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5269618630409241, + "learning_rate": 2.9522914558334537e-05, + "loss": 0.1944, + "step": 6807, + "teacher_loss": 0.15742658078670502 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.9994422197341919, + "learning_rate": 2.9527251698713314e-05, + "loss": 0.3766, + "step": 6808, + "teacher_loss": 0.30739444494247437 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.44173574447631836, + "learning_rate": 2.9531588839092092e-05, + "loss": 0.2821, + "step": 6809, + "teacher_loss": 0.26436513662338257 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 1.069823980331421, + "learning_rate": 2.953592597947087e-05, + "loss": 0.4399, + "step": 6810, + "teacher_loss": 0.36993080377578735 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.3338903486728668, + "learning_rate": 2.9540263119849647e-05, + "loss": 0.1966, + "step": 6811, + "teacher_loss": 0.1812984198331833 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.18092194199562073, + "learning_rate": 2.9544600260228425e-05, + "loss": 0.1459, + "step": 6812, + "teacher_loss": 0.14199167490005493 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.4730415344238281, + "learning_rate": 2.9548937400607203e-05, + "loss": 0.2613, + "step": 6813, + "teacher_loss": 0.2377467155456543 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6410003304481506, + "learning_rate": 2.9553274540985977e-05, + "loss": 0.2682, + "step": 6814, + "teacher_loss": 0.22678926587104797 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.674156904220581, + "learning_rate": 2.9557611681364754e-05, + "loss": 0.3221, + "step": 6815, + "teacher_loss": 0.282967209815979 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.304797887802124, + "learning_rate": 2.956194882174353e-05, + "loss": 0.2537, + "step": 6816, + "teacher_loss": 0.24801921844482422 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.3887847661972046, + "learning_rate": 2.9566285962122306e-05, + "loss": 0.3147, + "step": 6817, + "teacher_loss": 0.3065136671066284 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.7741252779960632, + "learning_rate": 2.9570623102501084e-05, + "loss": 0.4325, + "step": 6818, + "teacher_loss": 0.3945150077342987 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6991695165634155, + "learning_rate": 2.957496024287986e-05, + "loss": 0.3367, + "step": 6819, + "teacher_loss": 0.2964194715023041 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.6699053645133972, + "learning_rate": 2.957929738325864e-05, + "loss": 0.3404, + "step": 6820, + "teacher_loss": 0.3037913143634796 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.8536896705627441, + "learning_rate": 2.9583634523637417e-05, + "loss": 0.5411, + "step": 6821, + "teacher_loss": 0.5063959360122681 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.21807242929935455, + "learning_rate": 2.9587971664016194e-05, + "loss": 0.237, + "step": 6822, + "teacher_loss": 0.23915085196495056 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.26867300271987915, + "learning_rate": 2.9592308804394972e-05, + "loss": 0.2159, + "step": 6823, + "teacher_loss": 0.2100232094526291 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.23638300597667694, + "learning_rate": 2.959664594477375e-05, + "loss": 0.1484, + "step": 6824, + "teacher_loss": 0.13858887553215027 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.27739402651786804, + "learning_rate": 2.960098308515252e-05, + "loss": 0.1937, + "step": 6825, + "teacher_loss": 0.1844204068183899 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5223261117935181, + "learning_rate": 2.9605320225531298e-05, + "loss": 0.3899, + "step": 6826, + "teacher_loss": 0.37519919872283936 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.2585000991821289, + "learning_rate": 2.9609657365910076e-05, + "loss": 0.1791, + "step": 6827, + "teacher_loss": 0.17022843658924103 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.22532272338867188, + "learning_rate": 2.9613994506288853e-05, + "loss": 0.313, + "step": 6828, + "teacher_loss": 0.32278066873550415 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.20142707228660583, + "learning_rate": 2.961833164666763e-05, + "loss": 0.1553, + "step": 6829, + "teacher_loss": 0.15014129877090454 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.26881927251815796, + "learning_rate": 2.962266878704641e-05, + "loss": 0.2514, + "step": 6830, + "teacher_loss": 0.24944570660591125 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.5249983072280884, + "learning_rate": 2.9627005927425186e-05, + "loss": 0.2256, + "step": 6831, + "teacher_loss": 0.19232457876205444 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.31515008211135864, + "learning_rate": 2.9631343067803964e-05, + "loss": 0.2086, + "step": 6832, + "teacher_loss": 0.1967654526233673 + }, + { + "compression_loss": 0.0, + "epoch": 1.23, + "label_loss": 0.38841694593429565, + "learning_rate": 2.963568020818274e-05, + "loss": 0.4299, + "step": 6833, + "teacher_loss": 0.4345610737800598 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.43253010511398315, + "learning_rate": 2.9640017348561516e-05, + "loss": 0.234, + "step": 6834, + "teacher_loss": 0.21198254823684692 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.6204953193664551, + "learning_rate": 2.9644354488940293e-05, + "loss": 0.3299, + "step": 6835, + "teacher_loss": 0.2976353168487549 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 1.1702687740325928, + "learning_rate": 2.9648691629319068e-05, + "loss": 0.6191, + "step": 6836, + "teacher_loss": 0.5578266382217407 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.4147545397281647, + "learning_rate": 2.9653028769697845e-05, + "loss": 0.2885, + "step": 6837, + "teacher_loss": 0.2744476795196533 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.6010729074478149, + "learning_rate": 2.9657365910076623e-05, + "loss": 0.3336, + "step": 6838, + "teacher_loss": 0.30388563871383667 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.19058087468147278, + "learning_rate": 2.96617030504554e-05, + "loss": 0.1857, + "step": 6839, + "teacher_loss": 0.1851271688938141 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5324939489364624, + "learning_rate": 2.9666040190834178e-05, + "loss": 0.2746, + "step": 6840, + "teacher_loss": 0.2458955943584442 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2717173397541046, + "learning_rate": 2.9670377331212956e-05, + "loss": 0.1946, + "step": 6841, + "teacher_loss": 0.18598389625549316 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.45083948969841003, + "learning_rate": 2.9674714471591733e-05, + "loss": 0.2737, + "step": 6842, + "teacher_loss": 0.2539963126182556 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5311942100524902, + "learning_rate": 2.9679051611970508e-05, + "loss": 0.2673, + "step": 6843, + "teacher_loss": 0.23803116381168365 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.28006845712661743, + "learning_rate": 2.9683388752349285e-05, + "loss": 0.1979, + "step": 6844, + "teacher_loss": 0.18875573575496674 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5967820882797241, + "learning_rate": 2.9687725892728063e-05, + "loss": 0.4121, + "step": 6845, + "teacher_loss": 0.39161211252212524 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.45988965034484863, + "learning_rate": 2.969206303310684e-05, + "loss": 0.2334, + "step": 6846, + "teacher_loss": 0.2082492858171463 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.18419361114501953, + "learning_rate": 2.9696400173485615e-05, + "loss": 0.1609, + "step": 6847, + "teacher_loss": 0.15836480259895325 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.18904048204421997, + "learning_rate": 2.9700737313864392e-05, + "loss": 0.1908, + "step": 6848, + "teacher_loss": 0.19102071225643158 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.4729083776473999, + "learning_rate": 2.970507445424317e-05, + "loss": 0.3216, + "step": 6849, + "teacher_loss": 0.30473610758781433 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.39026209712028503, + "learning_rate": 2.9709411594621948e-05, + "loss": 0.269, + "step": 6850, + "teacher_loss": 0.2554818391799927 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.790266752243042, + "learning_rate": 2.9713748735000722e-05, + "loss": 0.3625, + "step": 6851, + "teacher_loss": 0.3149150013923645 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.3125373125076294, + "learning_rate": 2.97180858753795e-05, + "loss": 0.2478, + "step": 6852, + "teacher_loss": 0.24058011174201965 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5254619121551514, + "learning_rate": 2.9722423015758277e-05, + "loss": 0.2782, + "step": 6853, + "teacher_loss": 0.2506744861602783 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.819421648979187, + "learning_rate": 2.9726760156137055e-05, + "loss": 0.3386, + "step": 6854, + "teacher_loss": 0.28517329692840576 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.34773337841033936, + "learning_rate": 2.9731097296515832e-05, + "loss": 0.3924, + "step": 6855, + "teacher_loss": 0.397353857755661 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.34012508392333984, + "learning_rate": 2.973543443689461e-05, + "loss": 0.2446, + "step": 6856, + "teacher_loss": 0.23400932550430298 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.6877815127372742, + "learning_rate": 2.9739771577273387e-05, + "loss": 0.482, + "step": 6857, + "teacher_loss": 0.45912912487983704 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5139047503471375, + "learning_rate": 2.9744108717652162e-05, + "loss": 0.3612, + "step": 6858, + "teacher_loss": 0.34422624111175537 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.37956053018569946, + "learning_rate": 2.974844585803094e-05, + "loss": 0.3155, + "step": 6859, + "teacher_loss": 0.30836912989616394 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.48840436339378357, + "learning_rate": 2.9752782998409714e-05, + "loss": 0.3013, + "step": 6860, + "teacher_loss": 0.2804993987083435 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.43081966042518616, + "learning_rate": 2.975712013878849e-05, + "loss": 0.227, + "step": 6861, + "teacher_loss": 0.20438753068447113 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.48362863063812256, + "learning_rate": 2.976145727916727e-05, + "loss": 0.2828, + "step": 6862, + "teacher_loss": 0.26046252250671387 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2711610794067383, + "learning_rate": 2.9765794419546046e-05, + "loss": 0.2627, + "step": 6863, + "teacher_loss": 0.26176947355270386 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5587612390518188, + "learning_rate": 2.9770131559924824e-05, + "loss": 0.2485, + "step": 6864, + "teacher_loss": 0.2140185534954071 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.36521023511886597, + "learning_rate": 2.97744687003036e-05, + "loss": 0.2951, + "step": 6865, + "teacher_loss": 0.2873638868331909 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.3985061049461365, + "learning_rate": 2.977880584068238e-05, + "loss": 0.2811, + "step": 6866, + "teacher_loss": 0.2680205702781677 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2042558789253235, + "learning_rate": 2.9783142981061157e-05, + "loss": 0.2335, + "step": 6867, + "teacher_loss": 0.23671205341815948 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.48562318086624146, + "learning_rate": 2.978748012143993e-05, + "loss": 0.25, + "step": 6868, + "teacher_loss": 0.22379852831363678 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.38065552711486816, + "learning_rate": 2.9791817261818705e-05, + "loss": 0.3019, + "step": 6869, + "teacher_loss": 0.2931460738182068 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.49717220664024353, + "learning_rate": 2.9796154402197483e-05, + "loss": 0.2187, + "step": 6870, + "teacher_loss": 0.18780797719955444 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2969147562980652, + "learning_rate": 2.980049154257626e-05, + "loss": 0.2706, + "step": 6871, + "teacher_loss": 0.26768600940704346 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.7951686382293701, + "learning_rate": 2.9804828682955038e-05, + "loss": 0.3124, + "step": 6872, + "teacher_loss": 0.2588126063346863 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.6118711233139038, + "learning_rate": 2.9809165823333816e-05, + "loss": 0.2345, + "step": 6873, + "teacher_loss": 0.19254979491233826 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.4556140899658203, + "learning_rate": 2.9813502963712594e-05, + "loss": 0.3576, + "step": 6874, + "teacher_loss": 0.3467370271682739 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5523602962493896, + "learning_rate": 2.981784010409137e-05, + "loss": 0.2713, + "step": 6875, + "teacher_loss": 0.24008990824222565 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.21420562267303467, + "learning_rate": 2.982217724447015e-05, + "loss": 0.1738, + "step": 6876, + "teacher_loss": 0.1693510115146637 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.3408447206020355, + "learning_rate": 2.9826514384848926e-05, + "loss": 0.2987, + "step": 6877, + "teacher_loss": 0.2940502166748047 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.4900956153869629, + "learning_rate": 2.98308515252277e-05, + "loss": 0.237, + "step": 6878, + "teacher_loss": 0.20887476205825806 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.6433770656585693, + "learning_rate": 2.9835188665606475e-05, + "loss": 0.3834, + "step": 6879, + "teacher_loss": 0.35452088713645935 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.5121951699256897, + "learning_rate": 2.9839525805985252e-05, + "loss": 0.2383, + "step": 6880, + "teacher_loss": 0.20788998901844025 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.25369513034820557, + "learning_rate": 2.984386294636403e-05, + "loss": 0.3165, + "step": 6881, + "teacher_loss": 0.32352787256240845 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2857291102409363, + "learning_rate": 2.9848200086742808e-05, + "loss": 0.1954, + "step": 6882, + "teacher_loss": 0.18533584475517273 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.666456401348114, + "learning_rate": 2.9852537227121585e-05, + "loss": 0.2391, + "step": 6883, + "teacher_loss": 0.19161485135555267 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2401951253414154, + "learning_rate": 2.9856874367500363e-05, + "loss": 0.2017, + "step": 6884, + "teacher_loss": 0.19739894568920135 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.23822197318077087, + "learning_rate": 2.986121150787914e-05, + "loss": 0.2036, + "step": 6885, + "teacher_loss": 0.19979208707809448 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.43056944012641907, + "learning_rate": 2.9865548648257918e-05, + "loss": 0.2553, + "step": 6886, + "teacher_loss": 0.23586609959602356 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.2658616602420807, + "learning_rate": 2.9869885788636692e-05, + "loss": 0.1962, + "step": 6887, + "teacher_loss": 0.18841680884361267 + }, + { + "compression_loss": 0.0, + "epoch": 1.24, + "label_loss": 0.9721401333808899, + "learning_rate": 2.987422292901547e-05, + "loss": 0.5472, + "step": 6888, + "teacher_loss": 0.5000076293945312 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.4527629613876343, + "learning_rate": 2.9878560069394248e-05, + "loss": 0.346, + "step": 6889, + "teacher_loss": 0.3341861367225647 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3134571313858032, + "learning_rate": 2.9882897209773022e-05, + "loss": 0.2373, + "step": 6890, + "teacher_loss": 0.2288718819618225 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.26660627126693726, + "learning_rate": 2.98872343501518e-05, + "loss": 0.2379, + "step": 6891, + "teacher_loss": 0.23465800285339355 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.4978344440460205, + "learning_rate": 2.9891571490530577e-05, + "loss": 0.3412, + "step": 6892, + "teacher_loss": 0.3237534165382385 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.1745733916759491, + "learning_rate": 2.9895908630909355e-05, + "loss": 0.1842, + "step": 6893, + "teacher_loss": 0.1852794885635376 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.5690048336982727, + "learning_rate": 2.9900245771288132e-05, + "loss": 0.3396, + "step": 6894, + "teacher_loss": 0.31414559483528137 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6939668655395508, + "learning_rate": 2.9904582911666907e-05, + "loss": 0.6192, + "step": 6895, + "teacher_loss": 0.6108812093734741 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.2658732533454895, + "learning_rate": 2.9908920052045684e-05, + "loss": 0.2071, + "step": 6896, + "teacher_loss": 0.2005537748336792 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.31617599725723267, + "learning_rate": 2.9913257192424462e-05, + "loss": 0.2143, + "step": 6897, + "teacher_loss": 0.20297953486442566 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.44130343198776245, + "learning_rate": 2.991759433280324e-05, + "loss": 0.1942, + "step": 6898, + "teacher_loss": 0.16669508814811707 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.320975661277771, + "learning_rate": 2.9921931473182017e-05, + "loss": 0.1906, + "step": 6899, + "teacher_loss": 0.176153302192688 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.19834718108177185, + "learning_rate": 2.9926268613560795e-05, + "loss": 0.2705, + "step": 6900, + "teacher_loss": 0.2785126566886902 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3560863733291626, + "learning_rate": 2.993060575393957e-05, + "loss": 0.226, + "step": 6901, + "teacher_loss": 0.21155351400375366 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 1.0060230493545532, + "learning_rate": 2.9934942894318347e-05, + "loss": 0.4203, + "step": 6902, + "teacher_loss": 0.355190634727478 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3327946066856384, + "learning_rate": 2.9939280034697124e-05, + "loss": 0.2583, + "step": 6903, + "teacher_loss": 0.24997329711914062 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.1829233467578888, + "learning_rate": 2.99436171750759e-05, + "loss": 0.2206, + "step": 6904, + "teacher_loss": 0.22479453682899475 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.25130724906921387, + "learning_rate": 2.9947954315454676e-05, + "loss": 0.2324, + "step": 6905, + "teacher_loss": 0.23026320338249207 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.5618460178375244, + "learning_rate": 2.9952291455833454e-05, + "loss": 0.2513, + "step": 6906, + "teacher_loss": 0.2168234884738922 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3922111392021179, + "learning_rate": 2.995662859621223e-05, + "loss": 0.2009, + "step": 6907, + "teacher_loss": 0.17964893579483032 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6414545774459839, + "learning_rate": 2.996096573659101e-05, + "loss": 0.3158, + "step": 6908, + "teacher_loss": 0.2796213626861572 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 1.6081366539001465, + "learning_rate": 2.9965302876969787e-05, + "loss": 0.4216, + "step": 6909, + "teacher_loss": 0.2897984981536865 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.5534387230873108, + "learning_rate": 2.9969640017348564e-05, + "loss": 0.3425, + "step": 6910, + "teacher_loss": 0.3190078139305115 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.581301748752594, + "learning_rate": 2.9973977157727342e-05, + "loss": 0.2854, + "step": 6911, + "teacher_loss": 0.25256484746932983 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6544755697250366, + "learning_rate": 2.9978314298106116e-05, + "loss": 0.3177, + "step": 6912, + "teacher_loss": 0.28024822473526 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.43475785851478577, + "learning_rate": 2.998265143848489e-05, + "loss": 0.2988, + "step": 6913, + "teacher_loss": 0.28374341130256653 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.9851709604263306, + "learning_rate": 2.9986988578863668e-05, + "loss": 0.3127, + "step": 6914, + "teacher_loss": 0.23798255622386932 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.26312679052352905, + "learning_rate": 2.9991325719242446e-05, + "loss": 0.2314, + "step": 6915, + "teacher_loss": 0.22791871428489685 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.1698855459690094, + "learning_rate": 2.9995662859621223e-05, + "loss": 0.1603, + "step": 6916, + "teacher_loss": 0.15923890471458435 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.5187318921089172, + "learning_rate": 3e-05, + "loss": 0.3849, + "step": 6917, + "teacher_loss": 0.37001535296440125 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.301098108291626, + "learning_rate": 2.9999999828047488e-05, + "loss": 0.2912, + "step": 6918, + "teacher_loss": 0.29009896516799927 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.18328280746936798, + "learning_rate": 2.9999999312189952e-05, + "loss": 0.2228, + "step": 6919, + "teacher_loss": 0.22715647518634796 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.5072847604751587, + "learning_rate": 2.99999984524274e-05, + "loss": 0.3987, + "step": 6920, + "teacher_loss": 0.3866388201713562 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.546576976776123, + "learning_rate": 2.9999997248759857e-05, + "loss": 0.2685, + "step": 6921, + "teacher_loss": 0.2375963032245636 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 1.1361072063446045, + "learning_rate": 2.9999995701187352e-05, + "loss": 0.5425, + "step": 6922, + "teacher_loss": 0.4765434265136719 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3432465195655823, + "learning_rate": 2.9999993809709916e-05, + "loss": 0.2318, + "step": 6923, + "teacher_loss": 0.21941183507442474 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3132544159889221, + "learning_rate": 2.9999991574327596e-05, + "loss": 0.2747, + "step": 6924, + "teacher_loss": 0.27045562863349915 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.2802899479866028, + "learning_rate": 2.999998899504044e-05, + "loss": 0.2751, + "step": 6925, + "teacher_loss": 0.27454817295074463 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.3673436641693115, + "learning_rate": 2.999998607184851e-05, + "loss": 0.3004, + "step": 6926, + "teacher_loss": 0.29298263788223267 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.684203028678894, + "learning_rate": 2.999998280475187e-05, + "loss": 0.4967, + "step": 6927, + "teacher_loss": 0.47590622305870056 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6588788032531738, + "learning_rate": 2.9999979193750598e-05, + "loss": 0.2766, + "step": 6928, + "teacher_loss": 0.23409071564674377 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.28143876791000366, + "learning_rate": 2.9999975238844774e-05, + "loss": 0.2499, + "step": 6929, + "teacher_loss": 0.24644330143928528 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.39426904916763306, + "learning_rate": 2.999997094003449e-05, + "loss": 0.2554, + "step": 6930, + "teacher_loss": 0.23998206853866577 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6348534822463989, + "learning_rate": 2.9999966297319848e-05, + "loss": 0.2804, + "step": 6931, + "teacher_loss": 0.24103891849517822 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.41482460498809814, + "learning_rate": 2.9999961310700946e-05, + "loss": 0.3531, + "step": 6932, + "teacher_loss": 0.34620028734207153 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.9377168416976929, + "learning_rate": 2.9999955980177908e-05, + "loss": 0.6643, + "step": 6933, + "teacher_loss": 0.6339671611785889 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.424083411693573, + "learning_rate": 2.9999950305750844e-05, + "loss": 0.2704, + "step": 6934, + "teacher_loss": 0.2533418834209442 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.189805805683136, + "learning_rate": 2.99999442874199e-05, + "loss": 0.1725, + "step": 6935, + "teacher_loss": 0.1705356240272522 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6979736089706421, + "learning_rate": 2.9999937925185203e-05, + "loss": 0.3509, + "step": 6936, + "teacher_loss": 0.3123500943183899 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.43820276856422424, + "learning_rate": 2.99999312190469e-05, + "loss": 0.4557, + "step": 6937, + "teacher_loss": 0.45765697956085205 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.6402939558029175, + "learning_rate": 2.9999924169005146e-05, + "loss": 0.4397, + "step": 6938, + "teacher_loss": 0.4173782169818878 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.41320082545280457, + "learning_rate": 2.9999916775060108e-05, + "loss": 0.2059, + "step": 6939, + "teacher_loss": 0.18290458619594574 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.25503039360046387, + "learning_rate": 2.9999909037211945e-05, + "loss": 0.2367, + "step": 6940, + "teacher_loss": 0.23467963933944702 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.959038257598877, + "learning_rate": 2.9999900955460843e-05, + "loss": 0.4326, + "step": 6941, + "teacher_loss": 0.3740893602371216 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.7127895355224609, + "learning_rate": 2.999989252980698e-05, + "loss": 0.2587, + "step": 6942, + "teacher_loss": 0.2082202136516571 + }, + { + "compression_loss": 0.0, + "epoch": 1.25, + "label_loss": 0.41858986020088196, + "learning_rate": 2.9999883760250553e-05, + "loss": 0.3065, + "step": 6943, + "teacher_loss": 0.2939937114715576 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3572905659675598, + "learning_rate": 2.999987464679177e-05, + "loss": 0.2274, + "step": 6944, + "teacher_loss": 0.21299190819263458 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.6799027323722839, + "learning_rate": 2.999986518943083e-05, + "loss": 0.2383, + "step": 6945, + "teacher_loss": 0.18928247690200806 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3660587668418884, + "learning_rate": 2.9999855388167953e-05, + "loss": 0.273, + "step": 6946, + "teacher_loss": 0.26270514726638794 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3960840106010437, + "learning_rate": 2.9999845243003365e-05, + "loss": 0.2293, + "step": 6947, + "teacher_loss": 0.2107643187046051 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3464674949645996, + "learning_rate": 2.9999834753937294e-05, + "loss": 0.2427, + "step": 6948, + "teacher_loss": 0.23114603757858276 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.34996238350868225, + "learning_rate": 2.9999823920969986e-05, + "loss": 0.2348, + "step": 6949, + "teacher_loss": 0.22201739251613617 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.2559641897678375, + "learning_rate": 2.9999812744101686e-05, + "loss": 0.1982, + "step": 6950, + "teacher_loss": 0.19174398481845856 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.24504128098487854, + "learning_rate": 2.9999801223332654e-05, + "loss": 0.2475, + "step": 6951, + "teacher_loss": 0.24778872728347778 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.6323034763336182, + "learning_rate": 2.9999789358663152e-05, + "loss": 0.3495, + "step": 6952, + "teacher_loss": 0.31812146306037903 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.6625404953956604, + "learning_rate": 2.999977715009345e-05, + "loss": 0.2925, + "step": 6953, + "teacher_loss": 0.25139766931533813 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3109323978424072, + "learning_rate": 2.999976459762383e-05, + "loss": 0.2439, + "step": 6954, + "teacher_loss": 0.23647215962409973 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.6176930069923401, + "learning_rate": 2.9999751701254575e-05, + "loss": 0.2786, + "step": 6955, + "teacher_loss": 0.24094988405704498 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5596502423286438, + "learning_rate": 2.9999738460985993e-05, + "loss": 0.3511, + "step": 6956, + "teacher_loss": 0.32790353894233704 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.20909713208675385, + "learning_rate": 2.999972487681838e-05, + "loss": 0.2004, + "step": 6957, + "teacher_loss": 0.19944703578948975 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.2424902319908142, + "learning_rate": 2.9999710948752037e-05, + "loss": 0.1666, + "step": 6958, + "teacher_loss": 0.15821288526058197 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.4242701232433319, + "learning_rate": 2.99996966767873e-05, + "loss": 0.2311, + "step": 6959, + "teacher_loss": 0.20968323945999146 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.2454378306865692, + "learning_rate": 2.999968206092449e-05, + "loss": 0.2263, + "step": 6960, + "teacher_loss": 0.2242169976234436 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.7741588354110718, + "learning_rate": 2.9999667101163943e-05, + "loss": 0.2586, + "step": 6961, + "teacher_loss": 0.20134606957435608 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.32335567474365234, + "learning_rate": 2.9999651797505995e-05, + "loss": 0.2167, + "step": 6962, + "teacher_loss": 0.204800546169281 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5957107543945312, + "learning_rate": 2.9999636149951007e-05, + "loss": 0.3502, + "step": 6963, + "teacher_loss": 0.3229502737522125 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3543875813484192, + "learning_rate": 2.9999620158499334e-05, + "loss": 0.3022, + "step": 6964, + "teacher_loss": 0.29643508791923523 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.18114180862903595, + "learning_rate": 2.999960382315134e-05, + "loss": 0.2001, + "step": 6965, + "teacher_loss": 0.20221254229545593 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.2476223111152649, + "learning_rate": 2.9999587143907402e-05, + "loss": 0.1979, + "step": 6966, + "teacher_loss": 0.19242295622825623 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5913947820663452, + "learning_rate": 2.9999570120767902e-05, + "loss": 0.2916, + "step": 6967, + "teacher_loss": 0.25832730531692505 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.41619378328323364, + "learning_rate": 2.999955275373323e-05, + "loss": 0.2489, + "step": 6968, + "teacher_loss": 0.23034590482711792 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.2973743677139282, + "learning_rate": 2.9999535042803782e-05, + "loss": 0.1976, + "step": 6969, + "teacher_loss": 0.18653151392936707 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.33213841915130615, + "learning_rate": 2.9999516987979972e-05, + "loss": 0.2726, + "step": 6970, + "teacher_loss": 0.26597338914871216 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.18278184533119202, + "learning_rate": 2.9999498589262204e-05, + "loss": 0.1717, + "step": 6971, + "teacher_loss": 0.17041435837745667 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.756203830242157, + "learning_rate": 2.9999479846650904e-05, + "loss": 0.3278, + "step": 6972, + "teacher_loss": 0.28016287088394165 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3646557033061981, + "learning_rate": 2.9999460760146503e-05, + "loss": 0.2077, + "step": 6973, + "teacher_loss": 0.19028012454509735 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.4121394455432892, + "learning_rate": 2.999944132974944e-05, + "loss": 0.2106, + "step": 6974, + "teacher_loss": 0.18819761276245117 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5164729952812195, + "learning_rate": 2.9999421555460155e-05, + "loss": 0.2277, + "step": 6975, + "teacher_loss": 0.19556701183319092 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5639902353286743, + "learning_rate": 2.99994014372791e-05, + "loss": 0.2336, + "step": 6976, + "teacher_loss": 0.1969141960144043 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.1770978718996048, + "learning_rate": 2.999938097520675e-05, + "loss": 0.1738, + "step": 6977, + "teacher_loss": 0.17346912622451782 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.61199551820755, + "learning_rate": 2.9999360169243556e-05, + "loss": 0.3848, + "step": 6978, + "teacher_loss": 0.3595999777317047 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.35419583320617676, + "learning_rate": 2.999933901939001e-05, + "loss": 0.1854, + "step": 6979, + "teacher_loss": 0.16664013266563416 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.516225278377533, + "learning_rate": 2.999931752564659e-05, + "loss": 0.2545, + "step": 6980, + "teacher_loss": 0.22547045350074768 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 1.218553066253662, + "learning_rate": 2.999929568801379e-05, + "loss": 0.7922, + "step": 6981, + "teacher_loss": 0.7448649406433105 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5328869223594666, + "learning_rate": 2.9999273506492104e-05, + "loss": 0.2671, + "step": 6982, + "teacher_loss": 0.23754283785820007 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.40544432401657104, + "learning_rate": 2.9999250981082053e-05, + "loss": 0.305, + "step": 6983, + "teacher_loss": 0.29385462403297424 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5449610352516174, + "learning_rate": 2.9999228111784143e-05, + "loss": 0.2381, + "step": 6984, + "teacher_loss": 0.2040400356054306 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.285076379776001, + "learning_rate": 2.9999204898598907e-05, + "loss": 0.1831, + "step": 6985, + "teacher_loss": 0.1717534214258194 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5044917464256287, + "learning_rate": 2.999918134152687e-05, + "loss": 0.2475, + "step": 6986, + "teacher_loss": 0.21893665194511414 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3204505443572998, + "learning_rate": 2.999915744056857e-05, + "loss": 0.2422, + "step": 6987, + "teacher_loss": 0.2334899604320526 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.4988766312599182, + "learning_rate": 2.9999133195724563e-05, + "loss": 0.4315, + "step": 6988, + "teacher_loss": 0.42405635118484497 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.43875131011009216, + "learning_rate": 2.9999108606995405e-05, + "loss": 0.2377, + "step": 6989, + "teacher_loss": 0.21536210179328918 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.307504802942276, + "learning_rate": 2.9999083674381658e-05, + "loss": 0.3261, + "step": 6990, + "teacher_loss": 0.328177809715271 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3905857801437378, + "learning_rate": 2.999905839788388e-05, + "loss": 0.308, + "step": 6991, + "teacher_loss": 0.2988031208515167 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.6156218647956848, + "learning_rate": 2.9999032777502675e-05, + "loss": 0.2405, + "step": 6992, + "teacher_loss": 0.1988511085510254 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5167202949523926, + "learning_rate": 2.9999006813238615e-05, + "loss": 0.5218, + "step": 6993, + "teacher_loss": 0.522392213344574 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.09809376299381256, + "learning_rate": 2.9998980505092296e-05, + "loss": 0.1933, + "step": 6994, + "teacher_loss": 0.20384229719638824 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.29984307289123535, + "learning_rate": 2.999895385306432e-05, + "loss": 0.2412, + "step": 6995, + "teacher_loss": 0.23470190167427063 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5619527101516724, + "learning_rate": 2.9998926857155306e-05, + "loss": 0.1881, + "step": 6996, + "teacher_loss": 0.14651578664779663 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.5999884605407715, + "learning_rate": 2.9998899517365866e-05, + "loss": 0.2291, + "step": 6997, + "teacher_loss": 0.1878499984741211 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.317263126373291, + "learning_rate": 2.9998871833696633e-05, + "loss": 0.2472, + "step": 6998, + "teacher_loss": 0.23938389122486115 + }, + { + "compression_loss": 0.0, + "epoch": 1.26, + "label_loss": 0.3636245131492615, + "learning_rate": 2.9998843806148235e-05, + "loss": 0.1898, + "step": 6999, + "teacher_loss": 0.17049682140350342 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5415074229240417, + "learning_rate": 2.999881543472132e-05, + "loss": 0.2611, + "step": 7000, + "teacher_loss": 0.2299949675798416 + }, + { + "epoch": 1.27, + "eval_exact_match": 78.97824030274361, + "eval_f1": 86.59077935465757, + "step": 7000 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.812138557434082, + "learning_rate": 2.9998786719416534e-05, + "loss": 0.3343, + "step": 7001, + "teacher_loss": 0.2812288999557495 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.21095333993434906, + "learning_rate": 2.9998757660234536e-05, + "loss": 0.2068, + "step": 7002, + "teacher_loss": 0.20632602274417877 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.3168070316314697, + "learning_rate": 2.9998728257175992e-05, + "loss": 0.2739, + "step": 7003, + "teacher_loss": 0.2691390812397003 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.2506276071071625, + "learning_rate": 2.999869851024158e-05, + "loss": 0.2158, + "step": 7004, + "teacher_loss": 0.21193963289260864 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5702749490737915, + "learning_rate": 2.999866841943198e-05, + "loss": 0.2747, + "step": 7005, + "teacher_loss": 0.24183884263038635 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.7808693647384644, + "learning_rate": 2.9998637984747883e-05, + "loss": 0.4939, + "step": 7006, + "teacher_loss": 0.46204569935798645 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.08443786948919296, + "learning_rate": 2.999860720618998e-05, + "loss": 0.1544, + "step": 7007, + "teacher_loss": 0.16213035583496094 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.7030962705612183, + "learning_rate": 2.9998576083758987e-05, + "loss": 0.3729, + "step": 7008, + "teacher_loss": 0.3362564742565155 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.6169146299362183, + "learning_rate": 2.999854461745561e-05, + "loss": 0.3939, + "step": 7009, + "teacher_loss": 0.36915820837020874 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.32285699248313904, + "learning_rate": 2.9998512807280573e-05, + "loss": 0.1998, + "step": 7010, + "teacher_loss": 0.18611471354961395 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5274523496627808, + "learning_rate": 2.9998480653234607e-05, + "loss": 0.2732, + "step": 7011, + "teacher_loss": 0.24495935440063477 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.4237682819366455, + "learning_rate": 2.9998448155318445e-05, + "loss": 0.3258, + "step": 7012, + "teacher_loss": 0.31496158242225647 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.44937026500701904, + "learning_rate": 2.9998415313532835e-05, + "loss": 0.3272, + "step": 7013, + "teacher_loss": 0.31362441182136536 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5593776702880859, + "learning_rate": 2.999838212787853e-05, + "loss": 0.4883, + "step": 7014, + "teacher_loss": 0.4803839921951294 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.6155896186828613, + "learning_rate": 2.999834859835629e-05, + "loss": 0.2935, + "step": 7015, + "teacher_loss": 0.2577052116394043 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.8464725017547607, + "learning_rate": 2.9998314724966886e-05, + "loss": 0.3437, + "step": 7016, + "teacher_loss": 0.2878072261810303 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.40568894147872925, + "learning_rate": 2.999828050771109e-05, + "loss": 0.1958, + "step": 7017, + "teacher_loss": 0.1725206971168518 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.9212563633918762, + "learning_rate": 2.999824594658969e-05, + "loss": 0.3004, + "step": 7018, + "teacher_loss": 0.23143593966960907 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.2578338384628296, + "learning_rate": 2.9998211041603477e-05, + "loss": 0.1723, + "step": 7019, + "teacher_loss": 0.162795752286911 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5548545122146606, + "learning_rate": 2.999817579275325e-05, + "loss": 0.3069, + "step": 7020, + "teacher_loss": 0.2793705463409424 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.41252005100250244, + "learning_rate": 2.9998140200039827e-05, + "loss": 0.2356, + "step": 7021, + "teacher_loss": 0.21598170697689056 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.8479819893836975, + "learning_rate": 2.9998104263464005e-05, + "loss": 0.3621, + "step": 7022, + "teacher_loss": 0.30815866589546204 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.3009198307991028, + "learning_rate": 2.9998067983026624e-05, + "loss": 0.2491, + "step": 7023, + "teacher_loss": 0.24330535531044006 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.32685768604278564, + "learning_rate": 2.9998031358728514e-05, + "loss": 0.2254, + "step": 7024, + "teacher_loss": 0.2141449898481369 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.3330312967300415, + "learning_rate": 2.9997994390570507e-05, + "loss": 0.2375, + "step": 7025, + "teacher_loss": 0.2269349992275238 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.36006152629852295, + "learning_rate": 2.9997957078553458e-05, + "loss": 0.2864, + "step": 7026, + "teacher_loss": 0.2782706618309021 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.412614643573761, + "learning_rate": 2.9997919422678214e-05, + "loss": 0.1891, + "step": 7027, + "teacher_loss": 0.16427794098854065 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5532370805740356, + "learning_rate": 2.999788142294565e-05, + "loss": 0.3049, + "step": 7028, + "teacher_loss": 0.2773568630218506 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.676288902759552, + "learning_rate": 2.999784307935663e-05, + "loss": 0.2681, + "step": 7029, + "teacher_loss": 0.22275730967521667 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.4067785143852234, + "learning_rate": 2.9997804391912028e-05, + "loss": 0.2385, + "step": 7030, + "teacher_loss": 0.21977736055850983 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 1.1693761348724365, + "learning_rate": 2.999776536061274e-05, + "loss": 0.3556, + "step": 7031, + "teacher_loss": 0.2652049660682678 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.8631430864334106, + "learning_rate": 2.9997725985459663e-05, + "loss": 0.276, + "step": 7032, + "teacher_loss": 0.21076250076293945 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.7710654735565186, + "learning_rate": 2.999768626645369e-05, + "loss": 0.694, + "step": 7033, + "teacher_loss": 0.6854475140571594 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.19282880425453186, + "learning_rate": 2.9997646203595734e-05, + "loss": 0.1962, + "step": 7034, + "teacher_loss": 0.19652104377746582 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.3251870274543762, + "learning_rate": 2.9997605796886722e-05, + "loss": 0.2064, + "step": 7035, + "teacher_loss": 0.19319823384284973 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.36963504552841187, + "learning_rate": 2.999756504632757e-05, + "loss": 0.2815, + "step": 7036, + "teacher_loss": 0.2717294991016388 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.6764625310897827, + "learning_rate": 2.999752395191922e-05, + "loss": 0.2891, + "step": 7037, + "teacher_loss": 0.24601122736930847 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.09965810179710388, + "learning_rate": 2.9997482513662605e-05, + "loss": 0.1486, + "step": 7038, + "teacher_loss": 0.1540473997592926 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.6917380690574646, + "learning_rate": 2.9997440731558685e-05, + "loss": 0.3847, + "step": 7039, + "teacher_loss": 0.350607693195343 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 1.0150301456451416, + "learning_rate": 2.9997398605608415e-05, + "loss": 0.3568, + "step": 7040, + "teacher_loss": 0.28369972109794617 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5362294912338257, + "learning_rate": 2.9997356135812756e-05, + "loss": 0.2874, + "step": 7041, + "teacher_loss": 0.2597554326057434 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.6085846424102783, + "learning_rate": 2.999731332217269e-05, + "loss": 0.3735, + "step": 7042, + "teacher_loss": 0.34737643599510193 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5395544767379761, + "learning_rate": 2.9997270164689188e-05, + "loss": 0.3561, + "step": 7043, + "teacher_loss": 0.3357166647911072 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.8649001717567444, + "learning_rate": 2.9997226663363247e-05, + "loss": 0.3615, + "step": 7044, + "teacher_loss": 0.30562055110931396 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.47918346524238586, + "learning_rate": 2.9997182818195862e-05, + "loss": 0.2675, + "step": 7045, + "teacher_loss": 0.2439352571964264 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.5492284297943115, + "learning_rate": 2.999713862918804e-05, + "loss": 0.3206, + "step": 7046, + "teacher_loss": 0.29521557688713074 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.398273229598999, + "learning_rate": 2.9997094096340794e-05, + "loss": 0.3009, + "step": 7047, + "teacher_loss": 0.29005739092826843 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.43202292919158936, + "learning_rate": 2.9997049219655144e-05, + "loss": 0.23, + "step": 7048, + "teacher_loss": 0.2075108140707016 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.20541566610336304, + "learning_rate": 2.9997003999132115e-05, + "loss": 0.23, + "step": 7049, + "teacher_loss": 0.23278382420539856 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 1.0899384021759033, + "learning_rate": 2.9996958434772755e-05, + "loss": 0.4126, + "step": 7050, + "teacher_loss": 0.3373585641384125 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.15908819437026978, + "learning_rate": 2.9996912526578096e-05, + "loss": 0.193, + "step": 7051, + "teacher_loss": 0.19680100679397583 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.22236061096191406, + "learning_rate": 2.9996866274549193e-05, + "loss": 0.2171, + "step": 7052, + "teacher_loss": 0.21649497747421265 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.4085918366909027, + "learning_rate": 2.9996819678687113e-05, + "loss": 0.2229, + "step": 7053, + "teacher_loss": 0.20228153467178345 + }, + { + "compression_loss": 0.0, + "epoch": 1.27, + "label_loss": 0.34753674268722534, + "learning_rate": 2.9996772738992923e-05, + "loss": 0.3218, + "step": 7054, + "teacher_loss": 0.318892240524292 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.34627026319503784, + "learning_rate": 2.9996725455467693e-05, + "loss": 0.2098, + "step": 7055, + "teacher_loss": 0.19466429948806763 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.23840349912643433, + "learning_rate": 2.9996677828112512e-05, + "loss": 0.1899, + "step": 7056, + "teacher_loss": 0.18447700142860413 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.3372713029384613, + "learning_rate": 2.9996629856928473e-05, + "loss": 0.2061, + "step": 7057, + "teacher_loss": 0.19154666364192963 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.5389776229858398, + "learning_rate": 2.999658154191667e-05, + "loss": 0.3625, + "step": 7058, + "teacher_loss": 0.3428640067577362 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.9021604657173157, + "learning_rate": 2.9996532883078218e-05, + "loss": 0.5237, + "step": 7059, + "teacher_loss": 0.4817003607749939 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6048001050949097, + "learning_rate": 2.9996483880414224e-05, + "loss": 0.289, + "step": 7060, + "teacher_loss": 0.2539489269256592 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.5220537185668945, + "learning_rate": 2.9996434533925822e-05, + "loss": 0.2579, + "step": 7061, + "teacher_loss": 0.22858746349811554 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.7754589319229126, + "learning_rate": 2.9996384843614135e-05, + "loss": 0.4231, + "step": 7062, + "teacher_loss": 0.3839249908924103 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 1.1422266960144043, + "learning_rate": 2.9996334809480303e-05, + "loss": 0.3817, + "step": 7063, + "teacher_loss": 0.297219455242157 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.46671149134635925, + "learning_rate": 2.999628443152548e-05, + "loss": 0.207, + "step": 7064, + "teacher_loss": 0.17811360955238342 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.2162889838218689, + "learning_rate": 2.9996233709750814e-05, + "loss": 0.2023, + "step": 7065, + "teacher_loss": 0.20074713230133057 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.7021883726119995, + "learning_rate": 2.9996182644157467e-05, + "loss": 0.3715, + "step": 7066, + "teacher_loss": 0.33478182554244995 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.32095256447792053, + "learning_rate": 2.9996131234746613e-05, + "loss": 0.3045, + "step": 7067, + "teacher_loss": 0.3027231693267822 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.5422878265380859, + "learning_rate": 2.9996079481519435e-05, + "loss": 0.29, + "step": 7068, + "teacher_loss": 0.26200637221336365 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.1546006202697754, + "learning_rate": 2.9996027384477114e-05, + "loss": 0.2669, + "step": 7069, + "teacher_loss": 0.2793290615081787 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 1.2620408535003662, + "learning_rate": 2.9995974943620844e-05, + "loss": 0.4436, + "step": 7070, + "teacher_loss": 0.35264214873313904 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.2649785876274109, + "learning_rate": 2.9995922158951827e-05, + "loss": 0.3955, + "step": 7071, + "teacher_loss": 0.4100552499294281 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.43429136276245117, + "learning_rate": 2.999586903047128e-05, + "loss": 0.2507, + "step": 7072, + "teacher_loss": 0.23028427362442017 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.9815129041671753, + "learning_rate": 2.999581555818041e-05, + "loss": 0.3295, + "step": 7073, + "teacher_loss": 0.2571081519126892 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.47569000720977783, + "learning_rate": 2.9995761742080454e-05, + "loss": 0.1801, + "step": 7074, + "teacher_loss": 0.14725643396377563 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.19973039627075195, + "learning_rate": 2.999570758217264e-05, + "loss": 0.2124, + "step": 7075, + "teacher_loss": 0.21381694078445435 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.3188217282295227, + "learning_rate": 2.999565307845821e-05, + "loss": 0.2721, + "step": 7076, + "teacher_loss": 0.26690739393234253 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.5309109091758728, + "learning_rate": 2.9995598230938416e-05, + "loss": 0.2501, + "step": 7077, + "teacher_loss": 0.21892693638801575 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.36627280712127686, + "learning_rate": 2.999554303961451e-05, + "loss": 0.2569, + "step": 7078, + "teacher_loss": 0.24470072984695435 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6151841878890991, + "learning_rate": 2.999548750448776e-05, + "loss": 0.297, + "step": 7079, + "teacher_loss": 0.26165279746055603 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.2857910990715027, + "learning_rate": 2.9995431625559445e-05, + "loss": 0.2861, + "step": 7080, + "teacher_loss": 0.2861219644546509 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.511077344417572, + "learning_rate": 2.999537540283084e-05, + "loss": 0.3178, + "step": 7081, + "teacher_loss": 0.2963334321975708 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.4285435676574707, + "learning_rate": 2.9995318836303235e-05, + "loss": 0.333, + "step": 7082, + "teacher_loss": 0.3224106431007385 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.38699859380722046, + "learning_rate": 2.999526192597793e-05, + "loss": 0.1956, + "step": 7083, + "teacher_loss": 0.17428764700889587 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.5516075491905212, + "learning_rate": 2.999520467185622e-05, + "loss": 0.2553, + "step": 7084, + "teacher_loss": 0.2223854809999466 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.5824899077415466, + "learning_rate": 2.999514707393943e-05, + "loss": 0.2916, + "step": 7085, + "teacher_loss": 0.2593334913253784 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.49483001232147217, + "learning_rate": 2.9995089132228877e-05, + "loss": 0.3306, + "step": 7086, + "teacher_loss": 0.31230786442756653 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.40986403822898865, + "learning_rate": 2.9995030846725886e-05, + "loss": 0.3109, + "step": 7087, + "teacher_loss": 0.2999221086502075 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.419546514749527, + "learning_rate": 2.9994972217431796e-05, + "loss": 0.2665, + "step": 7088, + "teacher_loss": 0.24949800968170166 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.2925000786781311, + "learning_rate": 2.9994913244347947e-05, + "loss": 0.2494, + "step": 7089, + "teacher_loss": 0.2446572333574295 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.29219719767570496, + "learning_rate": 2.9994853927475695e-05, + "loss": 0.2532, + "step": 7090, + "teacher_loss": 0.24883972108364105 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.2977334260940552, + "learning_rate": 2.9994794266816398e-05, + "loss": 0.2466, + "step": 7091, + "teacher_loss": 0.24091535806655884 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6371045112609863, + "learning_rate": 2.9994734262371433e-05, + "loss": 0.2041, + "step": 7092, + "teacher_loss": 0.15596921741962433 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.35970252752304077, + "learning_rate": 2.999467391414216e-05, + "loss": 0.2361, + "step": 7093, + "teacher_loss": 0.22237008810043335 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6484271287918091, + "learning_rate": 2.9994613222129977e-05, + "loss": 0.2918, + "step": 7094, + "teacher_loss": 0.2521324157714844 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6021228432655334, + "learning_rate": 2.9994552186336265e-05, + "loss": 0.6345, + "step": 7095, + "teacher_loss": 0.6381402015686035 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.3679259717464447, + "learning_rate": 2.9994490806762428e-05, + "loss": 0.2794, + "step": 7096, + "teacher_loss": 0.2695087790489197 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.37677910923957825, + "learning_rate": 2.999442908340987e-05, + "loss": 0.3153, + "step": 7097, + "teacher_loss": 0.3084523677825928 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.7784925699234009, + "learning_rate": 2.999436701628001e-05, + "loss": 0.4028, + "step": 7098, + "teacher_loss": 0.3610305190086365 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.7022049427032471, + "learning_rate": 2.999430460537427e-05, + "loss": 0.4192, + "step": 7099, + "teacher_loss": 0.3877614438533783 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.682978630065918, + "learning_rate": 2.999424185069408e-05, + "loss": 0.4808, + "step": 7100, + "teacher_loss": 0.45838505029678345 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6455715298652649, + "learning_rate": 2.9994178752240885e-05, + "loss": 0.2615, + "step": 7101, + "teacher_loss": 0.21881821751594543 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.4900006651878357, + "learning_rate": 2.9994115310016124e-05, + "loss": 0.2626, + "step": 7102, + "teacher_loss": 0.2373484969139099 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.27053219079971313, + "learning_rate": 2.999405152402125e-05, + "loss": 0.1927, + "step": 7103, + "teacher_loss": 0.18406102061271667 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.3029423952102661, + "learning_rate": 2.9993987394257732e-05, + "loss": 0.2984, + "step": 7104, + "teacher_loss": 0.2978520393371582 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.2939951419830322, + "learning_rate": 2.9993922920727034e-05, + "loss": 0.2282, + "step": 7105, + "teacher_loss": 0.22093728184700012 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.7132244110107422, + "learning_rate": 2.999385810343064e-05, + "loss": 0.246, + "step": 7106, + "teacher_loss": 0.1940385401248932 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.38666030764579773, + "learning_rate": 2.999379294237003e-05, + "loss": 0.2666, + "step": 7107, + "teacher_loss": 0.2532583177089691 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.6941266655921936, + "learning_rate": 2.9993727437546708e-05, + "loss": 0.3745, + "step": 7108, + "teacher_loss": 0.3390410840511322 + }, + { + "compression_loss": 0.0, + "epoch": 1.28, + "label_loss": 0.39075225591659546, + "learning_rate": 2.9993661588962165e-05, + "loss": 0.2317, + "step": 7109, + "teacher_loss": 0.21402406692504883 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.25054943561553955, + "learning_rate": 2.9993595396617916e-05, + "loss": 0.2321, + "step": 7110, + "teacher_loss": 0.2300378382205963 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3752928674221039, + "learning_rate": 2.999352886051548e-05, + "loss": 0.2067, + "step": 7111, + "teacher_loss": 0.18799328804016113 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.22528742253780365, + "learning_rate": 2.999346198065638e-05, + "loss": 0.2084, + "step": 7112, + "teacher_loss": 0.20649507641792297 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.799654483795166, + "learning_rate": 2.9993394757042144e-05, + "loss": 0.4136, + "step": 7113, + "teacher_loss": 0.37069135904312134 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3277563452720642, + "learning_rate": 2.9993327189674323e-05, + "loss": 0.2022, + "step": 7114, + "teacher_loss": 0.1882563829421997 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.47933632135391235, + "learning_rate": 2.999325927855446e-05, + "loss": 0.2216, + "step": 7115, + "teacher_loss": 0.19295820593833923 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.7805335521697998, + "learning_rate": 2.9993191023684117e-05, + "loss": 0.4002, + "step": 7116, + "teacher_loss": 0.3579510748386383 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.26992034912109375, + "learning_rate": 2.9993122425064853e-05, + "loss": 0.2685, + "step": 7117, + "teacher_loss": 0.2683701515197754 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3277221918106079, + "learning_rate": 2.9993053482698246e-05, + "loss": 0.2403, + "step": 7118, + "teacher_loss": 0.23059161007404327 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.9460057616233826, + "learning_rate": 2.999298419658587e-05, + "loss": 0.3593, + "step": 7119, + "teacher_loss": 0.2940809428691864 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.443335622549057, + "learning_rate": 2.9992914566729322e-05, + "loss": 0.2804, + "step": 7120, + "teacher_loss": 0.2623181939125061 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.4839046001434326, + "learning_rate": 2.9992844593130192e-05, + "loss": 0.3337, + "step": 7121, + "teacher_loss": 0.3170493245124817 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.5446857810020447, + "learning_rate": 2.9992774275790086e-05, + "loss": 0.3235, + "step": 7122, + "teacher_loss": 0.2989093065261841 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.31440305709838867, + "learning_rate": 2.9992703614710617e-05, + "loss": 0.2488, + "step": 7123, + "teacher_loss": 0.24146537482738495 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.8490537405014038, + "learning_rate": 2.9992632609893404e-05, + "loss": 0.2901, + "step": 7124, + "teacher_loss": 0.22794979810714722 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.9768196940422058, + "learning_rate": 2.9992561261340078e-05, + "loss": 0.2772, + "step": 7125, + "teacher_loss": 0.19944468140602112 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.39531081914901733, + "learning_rate": 2.999248956905227e-05, + "loss": 0.2577, + "step": 7126, + "teacher_loss": 0.24241138994693756 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.46143633127212524, + "learning_rate": 2.9992417533031624e-05, + "loss": 0.3469, + "step": 7127, + "teacher_loss": 0.3341299295425415 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.7066437005996704, + "learning_rate": 2.9992345153279798e-05, + "loss": 0.3754, + "step": 7128, + "teacher_loss": 0.33864402770996094 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3455314040184021, + "learning_rate": 2.9992272429798444e-05, + "loss": 0.4258, + "step": 7129, + "teacher_loss": 0.4347544014453888 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.8617435693740845, + "learning_rate": 2.9992199362589232e-05, + "loss": 0.2916, + "step": 7130, + "teacher_loss": 0.22819873690605164 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.5560719966888428, + "learning_rate": 2.999212595165384e-05, + "loss": 0.3574, + "step": 7131, + "teacher_loss": 0.33533573150634766 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.39647501707077026, + "learning_rate": 2.999205219699395e-05, + "loss": 0.2981, + "step": 7132, + "teacher_loss": 0.2871958017349243 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.8323809504508972, + "learning_rate": 2.999197809861125e-05, + "loss": 0.2977, + "step": 7133, + "teacher_loss": 0.23826347291469574 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.1918633133172989, + "learning_rate": 2.999190365650744e-05, + "loss": 0.2057, + "step": 7134, + "teacher_loss": 0.20724640786647797 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.6777631044387817, + "learning_rate": 2.9991828870684224e-05, + "loss": 0.4649, + "step": 7135, + "teacher_loss": 0.44128215312957764 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.2537914216518402, + "learning_rate": 2.9991753741143323e-05, + "loss": 0.2023, + "step": 7136, + "teacher_loss": 0.19660988450050354 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.4911045432090759, + "learning_rate": 2.9991678267886458e-05, + "loss": 0.4027, + "step": 7137, + "teacher_loss": 0.3928261399269104 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.6986756920814514, + "learning_rate": 2.9991602450915355e-05, + "loss": 0.4151, + "step": 7138, + "teacher_loss": 0.38354456424713135 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3526952266693115, + "learning_rate": 2.9991526290231757e-05, + "loss": 0.3109, + "step": 7139, + "teacher_loss": 0.306215763092041 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.7459595203399658, + "learning_rate": 2.9991449785837405e-05, + "loss": 0.3501, + "step": 7140, + "teacher_loss": 0.30617064237594604 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.6981514096260071, + "learning_rate": 2.9991372937734057e-05, + "loss": 0.2819, + "step": 7141, + "teacher_loss": 0.23560896515846252 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.31977200508117676, + "learning_rate": 2.9991295745923476e-05, + "loss": 0.2066, + "step": 7142, + "teacher_loss": 0.1939723640680313 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.47662538290023804, + "learning_rate": 2.999121821040743e-05, + "loss": 0.2765, + "step": 7143, + "teacher_loss": 0.25425055623054504 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.5940206050872803, + "learning_rate": 2.9991140331187695e-05, + "loss": 0.4467, + "step": 7144, + "teacher_loss": 0.4303293824195862 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3084886968135834, + "learning_rate": 2.9991062108266058e-05, + "loss": 0.2447, + "step": 7145, + "teacher_loss": 0.2376406490802765 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.6255487203598022, + "learning_rate": 2.999098354164431e-05, + "loss": 0.2428, + "step": 7146, + "teacher_loss": 0.20024079084396362 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3910936713218689, + "learning_rate": 2.999090463132426e-05, + "loss": 0.2731, + "step": 7147, + "teacher_loss": 0.2599979639053345 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.36146464943885803, + "learning_rate": 2.999082537730771e-05, + "loss": 0.232, + "step": 7148, + "teacher_loss": 0.21756085753440857 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.48681795597076416, + "learning_rate": 2.9990745779596477e-05, + "loss": 0.2883, + "step": 7149, + "teacher_loss": 0.2662583589553833 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.28821074962615967, + "learning_rate": 2.999066583819239e-05, + "loss": 0.1824, + "step": 7150, + "teacher_loss": 0.17066849768161774 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.360034704208374, + "learning_rate": 2.9990585553097278e-05, + "loss": 0.2121, + "step": 7151, + "teacher_loss": 0.19562184810638428 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3686564862728119, + "learning_rate": 2.9990504924312982e-05, + "loss": 0.2756, + "step": 7152, + "teacher_loss": 0.2652715742588043 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 1.0196260213851929, + "learning_rate": 2.9990423951841355e-05, + "loss": 0.46, + "step": 7153, + "teacher_loss": 0.3978257477283478 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.47543373703956604, + "learning_rate": 2.9990342635684245e-05, + "loss": 0.3111, + "step": 7154, + "teacher_loss": 0.29283860325813293 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.36916032433509827, + "learning_rate": 2.999026097584353e-05, + "loss": 0.2307, + "step": 7155, + "teacher_loss": 0.21527311205863953 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.2077256739139557, + "learning_rate": 2.9990178972321073e-05, + "loss": 0.2493, + "step": 7156, + "teacher_loss": 0.2538982629776001 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.4414152503013611, + "learning_rate": 2.9990096625118747e-05, + "loss": 0.2288, + "step": 7157, + "teacher_loss": 0.20521017909049988 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.7102265954017639, + "learning_rate": 2.9990013934238453e-05, + "loss": 0.3283, + "step": 7158, + "teacher_loss": 0.2858346700668335 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.3479458689689636, + "learning_rate": 2.9989930899682084e-05, + "loss": 0.2229, + "step": 7159, + "teacher_loss": 0.20900848507881165 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.5895493626594543, + "learning_rate": 2.9989847521451542e-05, + "loss": 0.2572, + "step": 7160, + "teacher_loss": 0.22029028832912445 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.5886654853820801, + "learning_rate": 2.9989763799548735e-05, + "loss": 0.2714, + "step": 7161, + "teacher_loss": 0.23613232374191284 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.7484886646270752, + "learning_rate": 2.9989679733975588e-05, + "loss": 0.4315, + "step": 7162, + "teacher_loss": 0.3962434232234955 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.37206408381462097, + "learning_rate": 2.9989595324734022e-05, + "loss": 0.2553, + "step": 7163, + "teacher_loss": 0.24228841066360474 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.4293583631515503, + "learning_rate": 2.998951057182598e-05, + "loss": 0.3142, + "step": 7164, + "teacher_loss": 0.3014185428619385 + }, + { + "compression_loss": 0.0, + "epoch": 1.29, + "label_loss": 0.5553855299949646, + "learning_rate": 2.9989425475253398e-05, + "loss": 0.2994, + "step": 7165, + "teacher_loss": 0.2709383964538574 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.48882007598876953, + "learning_rate": 2.9989340035018233e-05, + "loss": 0.3505, + "step": 7166, + "teacher_loss": 0.335141122341156 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.7337089776992798, + "learning_rate": 2.9989254251122445e-05, + "loss": 0.5625, + "step": 7167, + "teacher_loss": 0.5434370040893555 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.18416103720664978, + "learning_rate": 2.998916812356799e-05, + "loss": 0.2409, + "step": 7168, + "teacher_loss": 0.24715732038021088 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.3693809509277344, + "learning_rate": 2.9989081652356853e-05, + "loss": 0.2378, + "step": 7169, + "teacher_loss": 0.2231893688440323 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.8484172821044922, + "learning_rate": 2.9988994837491017e-05, + "loss": 0.3488, + "step": 7170, + "teacher_loss": 0.29326844215393066 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.26507607102394104, + "learning_rate": 2.9988907678972462e-05, + "loss": 0.1844, + "step": 7171, + "teacher_loss": 0.1754055917263031 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.30666136741638184, + "learning_rate": 2.9988820176803194e-05, + "loss": 0.2726, + "step": 7172, + "teacher_loss": 0.26880425214767456 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.5847693681716919, + "learning_rate": 2.9988732330985223e-05, + "loss": 0.2438, + "step": 7173, + "teacher_loss": 0.20587779581546783 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.8975269794464111, + "learning_rate": 2.9988644141520557e-05, + "loss": 0.6467, + "step": 7174, + "teacher_loss": 0.6188441514968872 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.7468587160110474, + "learning_rate": 2.998855560841122e-05, + "loss": 0.3792, + "step": 7175, + "teacher_loss": 0.3383748531341553 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.5169508457183838, + "learning_rate": 2.9988466731659236e-05, + "loss": 0.3108, + "step": 7176, + "teacher_loss": 0.2879098057746887 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.18669115006923676, + "learning_rate": 2.998837751126665e-05, + "loss": 0.2968, + "step": 7177, + "teacher_loss": 0.30907437205314636 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.45584535598754883, + "learning_rate": 2.9988287947235505e-05, + "loss": 0.301, + "step": 7178, + "teacher_loss": 0.28374212980270386 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.31353482604026794, + "learning_rate": 2.9988198039567853e-05, + "loss": 0.2093, + "step": 7179, + "teacher_loss": 0.197670117020607 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.47423866391181946, + "learning_rate": 2.998810778826576e-05, + "loss": 0.2429, + "step": 7180, + "teacher_loss": 0.21721352636814117 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.34817564487457275, + "learning_rate": 2.9988017193331294e-05, + "loss": 0.19, + "step": 7181, + "teacher_loss": 0.1724245250225067 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4925878643989563, + "learning_rate": 2.9987926254766527e-05, + "loss": 0.2297, + "step": 7182, + "teacher_loss": 0.20054292678833008 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4395211338996887, + "learning_rate": 2.9987834972573544e-05, + "loss": 0.2944, + "step": 7183, + "teacher_loss": 0.27822357416152954 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.7453950047492981, + "learning_rate": 2.9987743346754447e-05, + "loss": 0.3412, + "step": 7184, + "teacher_loss": 0.29634031653404236 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.3255147933959961, + "learning_rate": 2.9987651377311325e-05, + "loss": 0.1968, + "step": 7185, + "teacher_loss": 0.18244364857673645 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.5304484367370605, + "learning_rate": 2.9987559064246296e-05, + "loss": 0.2257, + "step": 7186, + "teacher_loss": 0.19182555377483368 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.6397098302841187, + "learning_rate": 2.998746640756147e-05, + "loss": 0.4704, + "step": 7187, + "teacher_loss": 0.4515638053417206 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.36433055996894836, + "learning_rate": 2.9987373407258977e-05, + "loss": 0.2345, + "step": 7188, + "teacher_loss": 0.22003450989723206 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.303589403629303, + "learning_rate": 2.9987280063340946e-05, + "loss": 0.2524, + "step": 7189, + "teacher_loss": 0.24666264653205872 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.46916067600250244, + "learning_rate": 2.9987186375809513e-05, + "loss": 0.2504, + "step": 7190, + "teacher_loss": 0.22612908482551575 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.19502362608909607, + "learning_rate": 2.9987092344666835e-05, + "loss": 0.237, + "step": 7191, + "teacher_loss": 0.2416379302740097 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4044319987297058, + "learning_rate": 2.998699796991506e-05, + "loss": 0.2648, + "step": 7192, + "teacher_loss": 0.24926723539829254 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.33592790365219116, + "learning_rate": 2.998690325155636e-05, + "loss": 0.2243, + "step": 7193, + "teacher_loss": 0.2118675410747528 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.8964804410934448, + "learning_rate": 2.9986808189592897e-05, + "loss": 0.3862, + "step": 7194, + "teacher_loss": 0.32946592569351196 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.2786426842212677, + "learning_rate": 2.9986712784026857e-05, + "loss": 0.1845, + "step": 7195, + "teacher_loss": 0.17401817440986633 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.8743244409561157, + "learning_rate": 2.9986617034860425e-05, + "loss": 0.4471, + "step": 7196, + "teacher_loss": 0.3996553421020508 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.1617792248725891, + "learning_rate": 2.9986520942095797e-05, + "loss": 0.2246, + "step": 7197, + "teacher_loss": 0.23155635595321655 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.728060781955719, + "learning_rate": 2.9986424505735174e-05, + "loss": 0.6774, + "step": 7198, + "teacher_loss": 0.6718258857727051 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4503127336502075, + "learning_rate": 2.998632772578077e-05, + "loss": 0.1932, + "step": 7199, + "teacher_loss": 0.16466861963272095 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4381275773048401, + "learning_rate": 2.9986230602234804e-05, + "loss": 0.2348, + "step": 7200, + "teacher_loss": 0.2122519612312317 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.41315221786499023, + "learning_rate": 2.99861331350995e-05, + "loss": 0.1811, + "step": 7201, + "teacher_loss": 0.15530559420585632 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.18667566776275635, + "learning_rate": 2.998603532437709e-05, + "loss": 0.1219, + "step": 7202, + "teacher_loss": 0.11466941982507706 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.5891701579093933, + "learning_rate": 2.9985937170069825e-05, + "loss": 0.3042, + "step": 7203, + "teacher_loss": 0.2725449800491333 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4828670024871826, + "learning_rate": 2.9985838672179954e-05, + "loss": 0.3554, + "step": 7204, + "teacher_loss": 0.34118831157684326 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.498772531747818, + "learning_rate": 2.9985739830709724e-05, + "loss": 0.4497, + "step": 7205, + "teacher_loss": 0.444266140460968 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.20209330320358276, + "learning_rate": 2.9985640645661414e-05, + "loss": 0.187, + "step": 7206, + "teacher_loss": 0.1853528916835785 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4938291013240814, + "learning_rate": 2.9985541117037295e-05, + "loss": 0.3122, + "step": 7207, + "teacher_loss": 0.2919834852218628 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.31927141547203064, + "learning_rate": 2.9985441244839642e-05, + "loss": 0.2126, + "step": 7208, + "teacher_loss": 0.200755774974823 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.2732795178890228, + "learning_rate": 2.998534102907075e-05, + "loss": 0.2277, + "step": 7209, + "teacher_loss": 0.22258678078651428 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.43740200996398926, + "learning_rate": 2.998524046973292e-05, + "loss": 0.2202, + "step": 7210, + "teacher_loss": 0.19607087969779968 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.5119949579238892, + "learning_rate": 2.9985139566828457e-05, + "loss": 0.327, + "step": 7211, + "teacher_loss": 0.3064630329608917 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.6115130186080933, + "learning_rate": 2.9985038320359667e-05, + "loss": 0.2583, + "step": 7212, + "teacher_loss": 0.21901297569274902 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 1.0921207666397095, + "learning_rate": 2.9984936730328873e-05, + "loss": 0.2745, + "step": 7213, + "teacher_loss": 0.1836167722940445 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.475678026676178, + "learning_rate": 2.9984834796738412e-05, + "loss": 0.3437, + "step": 7214, + "teacher_loss": 0.3289860486984253 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 1.2674893140792847, + "learning_rate": 2.9984732519590615e-05, + "loss": 0.5025, + "step": 7215, + "teacher_loss": 0.41754835844039917 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.15699143707752228, + "learning_rate": 2.998462989888783e-05, + "loss": 0.178, + "step": 7216, + "teacher_loss": 0.18035903573036194 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.8209565281867981, + "learning_rate": 2.9984526934632402e-05, + "loss": 0.4067, + "step": 7217, + "teacher_loss": 0.36064085364341736 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.6676613092422485, + "learning_rate": 2.99844236268267e-05, + "loss": 0.3649, + "step": 7218, + "teacher_loss": 0.3312046527862549 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.4311748147010803, + "learning_rate": 2.9984319975473092e-05, + "loss": 0.3433, + "step": 7219, + "teacher_loss": 0.3335148096084595 + }, + { + "compression_loss": 0.0, + "epoch": 1.3, + "label_loss": 0.47203755378723145, + "learning_rate": 2.9984215980573947e-05, + "loss": 0.2357, + "step": 7220, + "teacher_loss": 0.20947599411010742 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.7399529814720154, + "learning_rate": 2.9984111642131662e-05, + "loss": 0.2939, + "step": 7221, + "teacher_loss": 0.2443842589855194 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.5778563618659973, + "learning_rate": 2.9984006960148616e-05, + "loss": 0.2241, + "step": 7222, + "teacher_loss": 0.18481546640396118 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.5575137138366699, + "learning_rate": 2.9983901934627222e-05, + "loss": 0.2614, + "step": 7223, + "teacher_loss": 0.2285292148590088 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.5923289060592651, + "learning_rate": 2.998379656556987e-05, + "loss": 0.3971, + "step": 7224, + "teacher_loss": 0.3754255771636963 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.2733168601989746, + "learning_rate": 2.9983690852978995e-05, + "loss": 0.2204, + "step": 7225, + "teacher_loss": 0.21456539630889893 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.43110713362693787, + "learning_rate": 2.9983584796857007e-05, + "loss": 0.2592, + "step": 7226, + "teacher_loss": 0.24008886516094208 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.4446399211883545, + "learning_rate": 2.9983478397206344e-05, + "loss": 0.2656, + "step": 7227, + "teacher_loss": 0.24568751454353333 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.4208083152770996, + "learning_rate": 2.998337165402945e-05, + "loss": 0.2599, + "step": 7228, + "teacher_loss": 0.242002934217453 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.35321855545043945, + "learning_rate": 2.9983264567328756e-05, + "loss": 0.3652, + "step": 7229, + "teacher_loss": 0.3664790391921997 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.3883362412452698, + "learning_rate": 2.9983157137106737e-05, + "loss": 0.2097, + "step": 7230, + "teacher_loss": 0.18981149792671204 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.8627563714981079, + "learning_rate": 2.998304936336584e-05, + "loss": 0.5401, + "step": 7231, + "teacher_loss": 0.5042321085929871 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.29157111048698425, + "learning_rate": 2.9982941246108543e-05, + "loss": 0.2588, + "step": 7232, + "teacher_loss": 0.25512123107910156 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.1831384301185608, + "learning_rate": 2.998283278533733e-05, + "loss": 0.1961, + "step": 7233, + "teacher_loss": 0.19749119877815247 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.8864837288856506, + "learning_rate": 2.9982723981054677e-05, + "loss": 0.3289, + "step": 7234, + "teacher_loss": 0.26696956157684326 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.3303971290588379, + "learning_rate": 2.9982614833263083e-05, + "loss": 0.247, + "step": 7235, + "teacher_loss": 0.2377208024263382 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.6651716828346252, + "learning_rate": 2.9982505341965056e-05, + "loss": 0.3817, + "step": 7236, + "teacher_loss": 0.35018348693847656 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.40689969062805176, + "learning_rate": 2.99823955071631e-05, + "loss": 0.2335, + "step": 7237, + "teacher_loss": 0.21428629755973816 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.2402096688747406, + "learning_rate": 2.9982285328859737e-05, + "loss": 0.2086, + "step": 7238, + "teacher_loss": 0.20507864654064178 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.24840183556079865, + "learning_rate": 2.9982174807057486e-05, + "loss": 0.2299, + "step": 7239, + "teacher_loss": 0.22786910831928253 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.8850395679473877, + "learning_rate": 2.9982063941758882e-05, + "loss": 0.3104, + "step": 7240, + "teacher_loss": 0.24658949673175812 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.49173837900161743, + "learning_rate": 2.9981952732966477e-05, + "loss": 0.3805, + "step": 7241, + "teacher_loss": 0.3680863380432129 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.7370061278343201, + "learning_rate": 2.998184118068281e-05, + "loss": 0.278, + "step": 7242, + "teacher_loss": 0.2270524799823761 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.23076987266540527, + "learning_rate": 2.998172928491045e-05, + "loss": 0.2173, + "step": 7243, + "teacher_loss": 0.21583566069602966 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.416591078042984, + "learning_rate": 2.9981617045651946e-05, + "loss": 0.2343, + "step": 7244, + "teacher_loss": 0.21407851576805115 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.3746960759162903, + "learning_rate": 2.9981504462909887e-05, + "loss": 0.1991, + "step": 7245, + "teacher_loss": 0.1795339584350586 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.8359163999557495, + "learning_rate": 2.998139153668684e-05, + "loss": 0.765, + "step": 7246, + "teacher_loss": 0.7571512460708618 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.6188288927078247, + "learning_rate": 2.998127826698541e-05, + "loss": 0.4419, + "step": 7247, + "teacher_loss": 0.42219096422195435 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.21094536781311035, + "learning_rate": 2.998116465380818e-05, + "loss": 0.2272, + "step": 7248, + "teacher_loss": 0.2290198802947998 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.32698947191238403, + "learning_rate": 2.9981050697157762e-05, + "loss": 0.2289, + "step": 7249, + "teacher_loss": 0.21796278655529022 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.3986563980579376, + "learning_rate": 2.9980936397036768e-05, + "loss": 0.2201, + "step": 7250, + "teacher_loss": 0.20028197765350342 + }, + { + "epoch": 1.31, + "eval_exact_match": 79.30936613055819, + "eval_f1": 86.74824375149826, + "step": 7250 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.2922189235687256, + "learning_rate": 2.9980821753447818e-05, + "loss": 0.2295, + "step": 7251, + "teacher_loss": 0.22257539629936218 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.19028058648109436, + "learning_rate": 2.998070676639354e-05, + "loss": 0.1842, + "step": 7252, + "teacher_loss": 0.18350613117218018 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.4416945278644562, + "learning_rate": 2.998059143587657e-05, + "loss": 0.2527, + "step": 7253, + "teacher_loss": 0.23173725605010986 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.9720132946968079, + "learning_rate": 2.998047576189955e-05, + "loss": 0.5182, + "step": 7254, + "teacher_loss": 0.4677361845970154 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.33549728989601135, + "learning_rate": 2.9980359744465134e-05, + "loss": 0.204, + "step": 7255, + "teacher_loss": 0.1893874704837799 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.2665591239929199, + "learning_rate": 2.9980243383575986e-05, + "loss": 0.1736, + "step": 7256, + "teacher_loss": 0.16327320039272308 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.4667154848575592, + "learning_rate": 2.998012667923477e-05, + "loss": 0.2381, + "step": 7257, + "teacher_loss": 0.21275238692760468 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.6106563806533813, + "learning_rate": 2.998000963144416e-05, + "loss": 0.3463, + "step": 7258, + "teacher_loss": 0.3169543743133545 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.48956573009490967, + "learning_rate": 2.997989224020684e-05, + "loss": 0.2344, + "step": 7259, + "teacher_loss": 0.2060491144657135 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.5121805667877197, + "learning_rate": 2.9979774505525512e-05, + "loss": 0.31, + "step": 7260, + "teacher_loss": 0.287537157535553 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.17669223248958588, + "learning_rate": 2.9979656427402857e-05, + "loss": 0.1679, + "step": 7261, + "teacher_loss": 0.1669164001941681 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.4805675148963928, + "learning_rate": 2.9979538005841594e-05, + "loss": 0.3345, + "step": 7262, + "teacher_loss": 0.3183096945285797 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.1577012836933136, + "learning_rate": 2.9979419240844435e-05, + "loss": 0.1925, + "step": 7263, + "teacher_loss": 0.19635093212127686 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.24642488360404968, + "learning_rate": 2.9979300132414105e-05, + "loss": 0.2278, + "step": 7264, + "teacher_loss": 0.2257044017314911 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.953657865524292, + "learning_rate": 2.9979180680553336e-05, + "loss": 0.3425, + "step": 7265, + "teacher_loss": 0.2746277153491974 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.2676847577095032, + "learning_rate": 2.997906088526486e-05, + "loss": 0.2003, + "step": 7266, + "teacher_loss": 0.19284990429878235 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.841678261756897, + "learning_rate": 2.9978940746551426e-05, + "loss": 0.3766, + "step": 7267, + "teacher_loss": 0.32495301961898804 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 1.0753730535507202, + "learning_rate": 2.997882026441579e-05, + "loss": 0.4057, + "step": 7268, + "teacher_loss": 0.33123910427093506 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.6366230249404907, + "learning_rate": 2.9978699438860718e-05, + "loss": 0.4198, + "step": 7269, + "teacher_loss": 0.3957279324531555 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.31219756603240967, + "learning_rate": 2.9978578269888974e-05, + "loss": 0.2013, + "step": 7270, + "teacher_loss": 0.18897521495819092 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.8002399206161499, + "learning_rate": 2.9978456757503337e-05, + "loss": 0.3853, + "step": 7271, + "teacher_loss": 0.3391510248184204 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.2662709355354309, + "learning_rate": 2.9978334901706597e-05, + "loss": 0.3012, + "step": 7272, + "teacher_loss": 0.30507731437683105 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.24473556876182556, + "learning_rate": 2.997821270250154e-05, + "loss": 0.277, + "step": 7273, + "teacher_loss": 0.28056466579437256 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.34481680393218994, + "learning_rate": 2.9978090159890984e-05, + "loss": 0.2123, + "step": 7274, + "teacher_loss": 0.19757075607776642 + }, + { + "compression_loss": 0.0, + "epoch": 1.31, + "label_loss": 0.8017687797546387, + "learning_rate": 2.997796727387772e-05, + "loss": 0.3596, + "step": 7275, + "teacher_loss": 0.31043741106987 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3086521625518799, + "learning_rate": 2.997784404446457e-05, + "loss": 0.3249, + "step": 7276, + "teacher_loss": 0.3267475664615631 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.4343588948249817, + "learning_rate": 2.9977720471654366e-05, + "loss": 0.2874, + "step": 7277, + "teacher_loss": 0.2710815668106079 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.28277331590652466, + "learning_rate": 2.9977596555449934e-05, + "loss": 0.2012, + "step": 7278, + "teacher_loss": 0.19212418794631958 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.2767001986503601, + "learning_rate": 2.9977472295854123e-05, + "loss": 0.21, + "step": 7279, + "teacher_loss": 0.20257991552352905 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.6051468253135681, + "learning_rate": 2.9977347692869773e-05, + "loss": 0.2635, + "step": 7280, + "teacher_loss": 0.22550326585769653 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.2482650727033615, + "learning_rate": 2.997722274649974e-05, + "loss": 0.2438, + "step": 7281, + "teacher_loss": 0.24328620731830597 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.4350046217441559, + "learning_rate": 2.9977097456746904e-05, + "loss": 0.2313, + "step": 7282, + "teacher_loss": 0.2086797058582306 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.42968565225601196, + "learning_rate": 2.997697182361412e-05, + "loss": 0.2518, + "step": 7283, + "teacher_loss": 0.23206770420074463 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.2455797642469406, + "learning_rate": 2.997684584710428e-05, + "loss": 0.1611, + "step": 7284, + "teacher_loss": 0.151741161942482 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.9952998161315918, + "learning_rate": 2.9976719527220265e-05, + "loss": 0.503, + "step": 7285, + "teacher_loss": 0.44833970069885254 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5655444264411926, + "learning_rate": 2.9976592863964975e-05, + "loss": 0.237, + "step": 7286, + "teacher_loss": 0.20048922300338745 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.25527966022491455, + "learning_rate": 2.9976465857341312e-05, + "loss": 0.1641, + "step": 7287, + "teacher_loss": 0.1540106236934662 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.8168010711669922, + "learning_rate": 2.9976338507352187e-05, + "loss": 0.3515, + "step": 7288, + "teacher_loss": 0.2997886836528778 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.8808541297912598, + "learning_rate": 2.9976210814000522e-05, + "loss": 0.2799, + "step": 7289, + "teacher_loss": 0.21310442686080933 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.09543114900588989, + "learning_rate": 2.9976082777289247e-05, + "loss": 0.1929, + "step": 7290, + "teacher_loss": 0.20374110341072083 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5572643876075745, + "learning_rate": 2.997595439722129e-05, + "loss": 0.3074, + "step": 7291, + "teacher_loss": 0.27965646982192993 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.29949676990509033, + "learning_rate": 2.9975825673799602e-05, + "loss": 0.1926, + "step": 7292, + "teacher_loss": 0.1807679682970047 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.432328462600708, + "learning_rate": 2.997569660702713e-05, + "loss": 0.1884, + "step": 7293, + "teacher_loss": 0.1613493263721466 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.6709614992141724, + "learning_rate": 2.997556719690684e-05, + "loss": 0.3086, + "step": 7294, + "teacher_loss": 0.26828891038894653 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.9702615141868591, + "learning_rate": 2.9975437443441686e-05, + "loss": 0.2902, + "step": 7295, + "teacher_loss": 0.21461263298988342 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.8656134605407715, + "learning_rate": 2.9975307346634654e-05, + "loss": 0.2512, + "step": 7296, + "teacher_loss": 0.18290898203849792 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5771152973175049, + "learning_rate": 2.997517690648872e-05, + "loss": 0.2408, + "step": 7297, + "teacher_loss": 0.20338109135627747 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.8216719627380371, + "learning_rate": 2.9975046123006876e-05, + "loss": 0.3716, + "step": 7298, + "teacher_loss": 0.32157260179519653 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.34991562366485596, + "learning_rate": 2.9974914996192124e-05, + "loss": 0.2098, + "step": 7299, + "teacher_loss": 0.19417712092399597 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.504607081413269, + "learning_rate": 2.997478352604747e-05, + "loss": 0.3049, + "step": 7300, + "teacher_loss": 0.2827131748199463 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3709542155265808, + "learning_rate": 2.9974651712575925e-05, + "loss": 0.311, + "step": 7301, + "teacher_loss": 0.30433326959609985 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.4605497121810913, + "learning_rate": 2.997451955578051e-05, + "loss": 0.3476, + "step": 7302, + "teacher_loss": 0.3350525498390198 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5222505331039429, + "learning_rate": 2.9974387055664264e-05, + "loss": 0.2586, + "step": 7303, + "teacher_loss": 0.22925019264221191 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.45401233434677124, + "learning_rate": 2.9974254212230213e-05, + "loss": 0.229, + "step": 7304, + "teacher_loss": 0.2039814591407776 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3161039352416992, + "learning_rate": 2.997412102548141e-05, + "loss": 0.2729, + "step": 7305, + "teacher_loss": 0.26813262701034546 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.36586785316467285, + "learning_rate": 2.9973987495420903e-05, + "loss": 0.2891, + "step": 7306, + "teacher_loss": 0.280546098947525 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.310201495885849, + "learning_rate": 2.9973853622051763e-05, + "loss": 0.2356, + "step": 7307, + "teacher_loss": 0.22728517651557922 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3546815514564514, + "learning_rate": 2.9973719405377052e-05, + "loss": 0.2363, + "step": 7308, + "teacher_loss": 0.22311818599700928 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.7373230457305908, + "learning_rate": 2.9973584845399847e-05, + "loss": 0.2632, + "step": 7309, + "teacher_loss": 0.21047137677669525 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.42884954810142517, + "learning_rate": 2.9973449942123235e-05, + "loss": 0.2186, + "step": 7310, + "teacher_loss": 0.19527886807918549 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.7588491439819336, + "learning_rate": 2.9973314695550308e-05, + "loss": 0.3227, + "step": 7311, + "teacher_loss": 0.27427947521209717 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3305927813053131, + "learning_rate": 2.9973179105684167e-05, + "loss": 0.2645, + "step": 7312, + "teacher_loss": 0.25716230273246765 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.4629131853580475, + "learning_rate": 2.9973043172527923e-05, + "loss": 0.2808, + "step": 7313, + "teacher_loss": 0.26051461696624756 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.42065292596817017, + "learning_rate": 2.9972906896084688e-05, + "loss": 0.2061, + "step": 7314, + "teacher_loss": 0.18229080736637115 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5817309617996216, + "learning_rate": 2.997277027635759e-05, + "loss": 0.3361, + "step": 7315, + "teacher_loss": 0.3087572455406189 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3875213861465454, + "learning_rate": 2.9972633313349764e-05, + "loss": 0.4142, + "step": 7316, + "teacher_loss": 0.4171638488769531 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5381307005882263, + "learning_rate": 2.9972496007064343e-05, + "loss": 0.2663, + "step": 7317, + "teacher_loss": 0.23611651360988617 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.40636610984802246, + "learning_rate": 2.997235835750448e-05, + "loss": 0.3038, + "step": 7318, + "teacher_loss": 0.2923821210861206 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.6476719379425049, + "learning_rate": 2.9972220364673327e-05, + "loss": 0.2671, + "step": 7319, + "teacher_loss": 0.2248186469078064 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.6620304584503174, + "learning_rate": 2.997208202857405e-05, + "loss": 0.2435, + "step": 7320, + "teacher_loss": 0.19694823026657104 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.7568368911743164, + "learning_rate": 2.997194334920982e-05, + "loss": 0.3799, + "step": 7321, + "teacher_loss": 0.33800676465034485 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.4141192138195038, + "learning_rate": 2.997180432658382e-05, + "loss": 0.239, + "step": 7322, + "teacher_loss": 0.21951784193515778 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.41509801149368286, + "learning_rate": 2.9971664960699234e-05, + "loss": 0.2544, + "step": 7323, + "teacher_loss": 0.23653200268745422 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3522759974002838, + "learning_rate": 2.997152525155926e-05, + "loss": 0.3048, + "step": 7324, + "teacher_loss": 0.299490749835968 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.23051688075065613, + "learning_rate": 2.9971385199167093e-05, + "loss": 0.1851, + "step": 7325, + "teacher_loss": 0.18002855777740479 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3328905403614044, + "learning_rate": 2.9971244803525956e-05, + "loss": 0.2161, + "step": 7326, + "teacher_loss": 0.2031579613685608 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.46655842661857605, + "learning_rate": 2.9971104064639055e-05, + "loss": 0.2304, + "step": 7327, + "teacher_loss": 0.20419305562973022 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.8703953623771667, + "learning_rate": 2.9970962982509627e-05, + "loss": 0.2976, + "step": 7328, + "teacher_loss": 0.23398801684379578 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.5596283674240112, + "learning_rate": 2.9970821557140904e-05, + "loss": 0.309, + "step": 7329, + "teacher_loss": 0.28118613362312317 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.3566111922264099, + "learning_rate": 2.9970679788536127e-05, + "loss": 0.2713, + "step": 7330, + "teacher_loss": 0.2618138790130615 + }, + { + "compression_loss": 0.0, + "epoch": 1.32, + "label_loss": 0.8700116872787476, + "learning_rate": 2.9970537676698547e-05, + "loss": 0.6352, + "step": 7331, + "teacher_loss": 0.6091054677963257 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.20053735375404358, + "learning_rate": 2.9970395221631422e-05, + "loss": 0.2262, + "step": 7332, + "teacher_loss": 0.2291019856929779 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 1.2145837545394897, + "learning_rate": 2.997025242333802e-05, + "loss": 0.3772, + "step": 7333, + "teacher_loss": 0.28420233726501465 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.49879172444343567, + "learning_rate": 2.9970109281821608e-05, + "loss": 0.3395, + "step": 7334, + "teacher_loss": 0.3218291401863098 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.45375770330429077, + "learning_rate": 2.9969965797085478e-05, + "loss": 0.2945, + "step": 7335, + "teacher_loss": 0.27675020694732666 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.39490634202957153, + "learning_rate": 2.9969821969132912e-05, + "loss": 0.2063, + "step": 7336, + "teacher_loss": 0.18534669280052185 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6186497807502747, + "learning_rate": 2.996967779796721e-05, + "loss": 0.2496, + "step": 7337, + "teacher_loss": 0.2086172252893448 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6019611358642578, + "learning_rate": 2.996953328359168e-05, + "loss": 0.3144, + "step": 7338, + "teacher_loss": 0.2824944853782654 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.1855420172214508, + "learning_rate": 2.9969388426009632e-05, + "loss": 0.1781, + "step": 7339, + "teacher_loss": 0.17729389667510986 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.45689040422439575, + "learning_rate": 2.9969243225224386e-05, + "loss": 0.2474, + "step": 7340, + "teacher_loss": 0.22407817840576172 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.2994353473186493, + "learning_rate": 2.9969097681239274e-05, + "loss": 0.2535, + "step": 7341, + "teacher_loss": 0.24840155243873596 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.5911794900894165, + "learning_rate": 2.9968951794057633e-05, + "loss": 0.3059, + "step": 7342, + "teacher_loss": 0.2742496728897095 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.40410545468330383, + "learning_rate": 2.9968805563682805e-05, + "loss": 0.2577, + "step": 7343, + "teacher_loss": 0.24144470691680908 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.11744584143161774, + "learning_rate": 2.9968658990118145e-05, + "loss": 0.1945, + "step": 7344, + "teacher_loss": 0.20309729874134064 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6942311525344849, + "learning_rate": 2.9968512073367015e-05, + "loss": 0.2202, + "step": 7345, + "teacher_loss": 0.167494535446167 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.3191685378551483, + "learning_rate": 2.9968364813432774e-05, + "loss": 0.252, + "step": 7346, + "teacher_loss": 0.24453842639923096 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.46853527426719666, + "learning_rate": 2.9968217210318808e-05, + "loss": 0.2723, + "step": 7347, + "teacher_loss": 0.2505247890949249 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.635383665561676, + "learning_rate": 2.9968069264028505e-05, + "loss": 0.3027, + "step": 7348, + "teacher_loss": 0.2657429277896881 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.1864016354084015, + "learning_rate": 2.9967920974565243e-05, + "loss": 0.2322, + "step": 7349, + "teacher_loss": 0.23734194040298462 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.2718780040740967, + "learning_rate": 2.9967772341932433e-05, + "loss": 0.2764, + "step": 7350, + "teacher_loss": 0.27686163783073425 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.2989136278629303, + "learning_rate": 2.9967623366133475e-05, + "loss": 0.2113, + "step": 7351, + "teacher_loss": 0.2015347182750702 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.3854275941848755, + "learning_rate": 2.9967474047171793e-05, + "loss": 0.42, + "step": 7352, + "teacher_loss": 0.4237934947013855 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.5246375799179077, + "learning_rate": 2.9967324385050806e-05, + "loss": 0.2373, + "step": 7353, + "teacher_loss": 0.20537760853767395 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.5562995076179504, + "learning_rate": 2.9967174379773943e-05, + "loss": 0.2478, + "step": 7354, + "teacher_loss": 0.2135460078716278 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.29658836126327515, + "learning_rate": 2.9967024031344646e-05, + "loss": 0.2276, + "step": 7355, + "teacher_loss": 0.21991756558418274 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.43919995427131653, + "learning_rate": 2.996687333976636e-05, + "loss": 0.3253, + "step": 7356, + "teacher_loss": 0.31269368529319763 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.3799929916858673, + "learning_rate": 2.9966722305042544e-05, + "loss": 0.2909, + "step": 7357, + "teacher_loss": 0.2810153365135193 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6406636238098145, + "learning_rate": 2.9966570927176653e-05, + "loss": 0.2437, + "step": 7358, + "teacher_loss": 0.19963723421096802 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.3877202272415161, + "learning_rate": 2.9966419206172167e-05, + "loss": 0.22, + "step": 7359, + "teacher_loss": 0.20136913657188416 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.7347228527069092, + "learning_rate": 2.996626714203256e-05, + "loss": 0.311, + "step": 7360, + "teacher_loss": 0.2638879418373108 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.68113774061203, + "learning_rate": 2.9966114734761318e-05, + "loss": 0.3111, + "step": 7361, + "teacher_loss": 0.269944429397583 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.4550720155239105, + "learning_rate": 2.9965961984361936e-05, + "loss": 0.3732, + "step": 7362, + "teacher_loss": 0.36410146951675415 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.4915362000465393, + "learning_rate": 2.9965808890837916e-05, + "loss": 0.2217, + "step": 7363, + "teacher_loss": 0.1917087584733963 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6807525157928467, + "learning_rate": 2.9965655454192765e-05, + "loss": 0.3509, + "step": 7364, + "teacher_loss": 0.31422409415245056 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.42059653997421265, + "learning_rate": 2.996550167443001e-05, + "loss": 0.2058, + "step": 7365, + "teacher_loss": 0.1819540560245514 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.34519878029823303, + "learning_rate": 2.996534755155317e-05, + "loss": 0.2145, + "step": 7366, + "teacher_loss": 0.19994811713695526 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.4315633773803711, + "learning_rate": 2.9965193085565774e-05, + "loss": 0.2369, + "step": 7367, + "teacher_loss": 0.2152448296546936 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6711968183517456, + "learning_rate": 2.996503827647137e-05, + "loss": 0.4459, + "step": 7368, + "teacher_loss": 0.42087170481681824 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.40061306953430176, + "learning_rate": 2.9964883124273508e-05, + "loss": 0.3204, + "step": 7369, + "teacher_loss": 0.31147223711013794 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6667156219482422, + "learning_rate": 2.996472762897574e-05, + "loss": 0.3206, + "step": 7370, + "teacher_loss": 0.28214824199676514 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.4160667061805725, + "learning_rate": 2.996457179058164e-05, + "loss": 0.2453, + "step": 7371, + "teacher_loss": 0.2263103425502777 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.34996652603149414, + "learning_rate": 2.9964415609094767e-05, + "loss": 0.2354, + "step": 7372, + "teacher_loss": 0.2226373255252838 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6272486448287964, + "learning_rate": 2.9964259084518718e-05, + "loss": 0.472, + "step": 7373, + "teacher_loss": 0.45476824045181274 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.2410384863615036, + "learning_rate": 2.9964102216857062e-05, + "loss": 0.2394, + "step": 7374, + "teacher_loss": 0.23927034437656403 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.22640088200569153, + "learning_rate": 2.9963945006113416e-05, + "loss": 0.144, + "step": 7375, + "teacher_loss": 0.13487055897712708 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.2411831021308899, + "learning_rate": 2.9963787452291376e-05, + "loss": 0.2467, + "step": 7376, + "teacher_loss": 0.24735099077224731 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.4106539487838745, + "learning_rate": 2.9963629555394548e-05, + "loss": 0.2563, + "step": 7377, + "teacher_loss": 0.23909598588943481 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.5019468665122986, + "learning_rate": 2.9963471315426558e-05, + "loss": 0.4787, + "step": 7378, + "teacher_loss": 0.4760851562023163 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.7579314708709717, + "learning_rate": 2.9963312732391037e-05, + "loss": 0.3872, + "step": 7379, + "teacher_loss": 0.3459699749946594 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 1.0692360401153564, + "learning_rate": 2.9963153806291617e-05, + "loss": 0.4457, + "step": 7380, + "teacher_loss": 0.37641799449920654 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.46559929847717285, + "learning_rate": 2.996299453713194e-05, + "loss": 0.2553, + "step": 7381, + "teacher_loss": 0.23196925222873688 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.5425081253051758, + "learning_rate": 2.9962834924915662e-05, + "loss": 0.4174, + "step": 7382, + "teacher_loss": 0.4035385847091675 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.6741442680358887, + "learning_rate": 2.996267496964644e-05, + "loss": 0.2465, + "step": 7383, + "teacher_loss": 0.19900484383106232 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.2983776926994324, + "learning_rate": 2.996251467132794e-05, + "loss": 0.2445, + "step": 7384, + "teacher_loss": 0.2384839653968811 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.4212455749511719, + "learning_rate": 2.9962354029963835e-05, + "loss": 0.3394, + "step": 7385, + "teacher_loss": 0.330324649810791 + }, + { + "compression_loss": 0.0, + "epoch": 1.33, + "label_loss": 0.17655718326568604, + "learning_rate": 2.9962193045557816e-05, + "loss": 0.2044, + "step": 7386, + "teacher_loss": 0.2074650377035141 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.24185813963413239, + "learning_rate": 2.996203171811357e-05, + "loss": 0.2186, + "step": 7387, + "teacher_loss": 0.21602776646614075 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.45158565044403076, + "learning_rate": 2.9961870047634795e-05, + "loss": 0.2737, + "step": 7388, + "teacher_loss": 0.25394681096076965 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.4181064963340759, + "learning_rate": 2.9961708034125196e-05, + "loss": 0.2393, + "step": 7389, + "teacher_loss": 0.21941301226615906 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2609187364578247, + "learning_rate": 2.996154567758849e-05, + "loss": 0.4262, + "step": 7390, + "teacher_loss": 0.4446113109588623 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.44070518016815186, + "learning_rate": 2.99613829780284e-05, + "loss": 0.2185, + "step": 7391, + "teacher_loss": 0.1938561648130417 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2816624045372009, + "learning_rate": 2.996121993544865e-05, + "loss": 0.2154, + "step": 7392, + "teacher_loss": 0.20804142951965332 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.4211142361164093, + "learning_rate": 2.996105654985299e-05, + "loss": 0.2525, + "step": 7393, + "teacher_loss": 0.23376566171646118 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.3629847466945648, + "learning_rate": 2.9960892821245152e-05, + "loss": 0.2108, + "step": 7394, + "teacher_loss": 0.1938496232032776 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.09167563915252686, + "learning_rate": 2.99607287496289e-05, + "loss": 0.2099, + "step": 7395, + "teacher_loss": 0.22303424775600433 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.6031914353370667, + "learning_rate": 2.9960564335007996e-05, + "loss": 0.279, + "step": 7396, + "teacher_loss": 0.24294881522655487 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.5824425220489502, + "learning_rate": 2.99603995773862e-05, + "loss": 0.311, + "step": 7397, + "teacher_loss": 0.28085461258888245 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.4841139316558838, + "learning_rate": 2.99602344767673e-05, + "loss": 0.2139, + "step": 7398, + "teacher_loss": 0.183875173330307 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.15167561173439026, + "learning_rate": 2.9960069033155072e-05, + "loss": 0.1765, + "step": 7399, + "teacher_loss": 0.17922484874725342 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.8382759094238281, + "learning_rate": 2.9959903246553316e-05, + "loss": 0.3067, + "step": 7400, + "teacher_loss": 0.2476838082075119 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.45282816886901855, + "learning_rate": 2.995973711696583e-05, + "loss": 0.3477, + "step": 7401, + "teacher_loss": 0.335995078086853 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.4538745582103729, + "learning_rate": 2.9959570644396423e-05, + "loss": 0.2624, + "step": 7402, + "teacher_loss": 0.24115785956382751 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.542221188545227, + "learning_rate": 2.9959403828848916e-05, + "loss": 0.2999, + "step": 7403, + "teacher_loss": 0.2729555368423462 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.13862469792366028, + "learning_rate": 2.9959236670327127e-05, + "loss": 0.1789, + "step": 7404, + "teacher_loss": 0.1833406686782837 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.22919392585754395, + "learning_rate": 2.995906916883489e-05, + "loss": 0.1772, + "step": 7405, + "teacher_loss": 0.17146193981170654 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.3760446012020111, + "learning_rate": 2.9958901324376046e-05, + "loss": 0.1895, + "step": 7406, + "teacher_loss": 0.16875171661376953 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2690447270870209, + "learning_rate": 2.9958733136954452e-05, + "loss": 0.1986, + "step": 7407, + "teacher_loss": 0.19072780013084412 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.4795798063278198, + "learning_rate": 2.9958564606573947e-05, + "loss": 0.2302, + "step": 7408, + "teacher_loss": 0.20250046253204346 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2868333160877228, + "learning_rate": 2.9958395733238405e-05, + "loss": 0.2491, + "step": 7409, + "teacher_loss": 0.2449251115322113 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2213318645954132, + "learning_rate": 2.9958226516951697e-05, + "loss": 0.2565, + "step": 7410, + "teacher_loss": 0.26037633419036865 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.4070155918598175, + "learning_rate": 2.99580569577177e-05, + "loss": 0.3134, + "step": 7411, + "teacher_loss": 0.3030399680137634 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.39761602878570557, + "learning_rate": 2.9957887055540307e-05, + "loss": 0.2598, + "step": 7412, + "teacher_loss": 0.24451839923858643 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.54997718334198, + "learning_rate": 2.995771681042341e-05, + "loss": 0.3066, + "step": 7413, + "teacher_loss": 0.27954599261283875 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2787015438079834, + "learning_rate": 2.9957546222370914e-05, + "loss": 0.3053, + "step": 7414, + "teacher_loss": 0.30828937888145447 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.48375430703163147, + "learning_rate": 2.9957375291386727e-05, + "loss": 0.2591, + "step": 7415, + "teacher_loss": 0.2340962141752243 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2840811312198639, + "learning_rate": 2.9957204017474767e-05, + "loss": 0.2304, + "step": 7416, + "teacher_loss": 0.22444060444831848 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.18929371237754822, + "learning_rate": 2.995703240063896e-05, + "loss": 0.1423, + "step": 7417, + "teacher_loss": 0.13702404499053955 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 1.0329270362854004, + "learning_rate": 2.995686044088325e-05, + "loss": 0.4708, + "step": 7418, + "teacher_loss": 0.40832197666168213 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2621626555919647, + "learning_rate": 2.9956688138211567e-05, + "loss": 0.2658, + "step": 7419, + "teacher_loss": 0.26625746488571167 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.5005480647087097, + "learning_rate": 2.995651549262787e-05, + "loss": 0.3065, + "step": 7420, + "teacher_loss": 0.2849048972129822 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.3439386188983917, + "learning_rate": 2.995634250413612e-05, + "loss": 0.2486, + "step": 7421, + "teacher_loss": 0.23796644806861877 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2944409251213074, + "learning_rate": 2.995616917274027e-05, + "loss": 0.1968, + "step": 7422, + "teacher_loss": 0.185902401804924 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 1.0446711778640747, + "learning_rate": 2.9955995498444306e-05, + "loss": 0.4179, + "step": 7423, + "teacher_loss": 0.3482702970504761 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 1.3766136169433594, + "learning_rate": 2.99558214812522e-05, + "loss": 0.347, + "step": 7424, + "teacher_loss": 0.23261412978172302 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.3324040472507477, + "learning_rate": 2.9955647121167955e-05, + "loss": 0.2165, + "step": 7425, + "teacher_loss": 0.20363083481788635 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2738818824291229, + "learning_rate": 2.9955472418195556e-05, + "loss": 0.3266, + "step": 7426, + "teacher_loss": 0.33250826597213745 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.35307320952415466, + "learning_rate": 2.9955297372339017e-05, + "loss": 0.2694, + "step": 7427, + "teacher_loss": 0.260053813457489 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 1.2614067792892456, + "learning_rate": 2.9955121983602344e-05, + "loss": 0.3985, + "step": 7428, + "teacher_loss": 0.30260366201400757 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.36976194381713867, + "learning_rate": 2.9954946251989563e-05, + "loss": 0.2484, + "step": 7429, + "teacher_loss": 0.23487775027751923 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.45760565996170044, + "learning_rate": 2.99547701775047e-05, + "loss": 0.261, + "step": 7430, + "teacher_loss": 0.23920223116874695 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.06537441164255142, + "learning_rate": 2.99545937601518e-05, + "loss": 0.1265, + "step": 7431, + "teacher_loss": 0.13328242301940918 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.3329549729824066, + "learning_rate": 2.9954416999934896e-05, + "loss": 0.3632, + "step": 7432, + "teacher_loss": 0.3666110634803772 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.29801440238952637, + "learning_rate": 2.9954239896858043e-05, + "loss": 0.4, + "step": 7433, + "teacher_loss": 0.4113055467605591 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.7839879989624023, + "learning_rate": 2.995406245092531e-05, + "loss": 0.3947, + "step": 7434, + "teacher_loss": 0.3514120876789093 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.38176602125167847, + "learning_rate": 2.9953884662140757e-05, + "loss": 0.2281, + "step": 7435, + "teacher_loss": 0.21097299456596375 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.34904783964157104, + "learning_rate": 2.9953706530508465e-05, + "loss": 0.2197, + "step": 7436, + "teacher_loss": 0.20534396171569824 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.24289575219154358, + "learning_rate": 2.9953528056032514e-05, + "loss": 0.2286, + "step": 7437, + "teacher_loss": 0.2269619107246399 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.2839312255382538, + "learning_rate": 2.9953349238716996e-05, + "loss": 0.2322, + "step": 7438, + "teacher_loss": 0.2263968288898468 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.542436957359314, + "learning_rate": 2.9953170078566014e-05, + "loss": 0.3226, + "step": 7439, + "teacher_loss": 0.2981450855731964 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.47624945640563965, + "learning_rate": 2.995299057558367e-05, + "loss": 0.3339, + "step": 7440, + "teacher_loss": 0.31803613901138306 + }, + { + "compression_loss": 0.0, + "epoch": 1.34, + "label_loss": 0.6108731031417847, + "learning_rate": 2.995281072977409e-05, + "loss": 0.3166, + "step": 7441, + "teacher_loss": 0.2839125394821167 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.33732280135154724, + "learning_rate": 2.995263054114139e-05, + "loss": 0.2568, + "step": 7442, + "teacher_loss": 0.24781520664691925 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.13159143924713135, + "learning_rate": 2.99524500096897e-05, + "loss": 0.1544, + "step": 7443, + "teacher_loss": 0.1569531410932541 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.33022165298461914, + "learning_rate": 2.995226913542316e-05, + "loss": 0.216, + "step": 7444, + "teacher_loss": 0.20328685641288757 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.2415725588798523, + "learning_rate": 2.995208791834592e-05, + "loss": 0.2788, + "step": 7445, + "teacher_loss": 0.282951295375824 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 1.1612355709075928, + "learning_rate": 2.995190635846213e-05, + "loss": 0.3122, + "step": 7446, + "teacher_loss": 0.21784768998622894 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.571759045124054, + "learning_rate": 2.9951724455775963e-05, + "loss": 0.2685, + "step": 7447, + "teacher_loss": 0.23475387692451477 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.2011307030916214, + "learning_rate": 2.995154221029157e-05, + "loss": 0.235, + "step": 7448, + "teacher_loss": 0.23872415721416473 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3539904057979584, + "learning_rate": 2.995135962201315e-05, + "loss": 0.2878, + "step": 7449, + "teacher_loss": 0.28048595786094666 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3041117489337921, + "learning_rate": 2.9951176690944877e-05, + "loss": 0.3779, + "step": 7450, + "teacher_loss": 0.38605183362960815 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.32429298758506775, + "learning_rate": 2.995099341709095e-05, + "loss": 0.2519, + "step": 7451, + "teacher_loss": 0.24389860033988953 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.48834770917892456, + "learning_rate": 2.9950809800455567e-05, + "loss": 0.3076, + "step": 7452, + "teacher_loss": 0.28749576210975647 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5275222063064575, + "learning_rate": 2.9950625841042943e-05, + "loss": 0.3044, + "step": 7453, + "teacher_loss": 0.27961310744285583 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5923051834106445, + "learning_rate": 2.995044153885729e-05, + "loss": 0.3115, + "step": 7454, + "teacher_loss": 0.2802680730819702 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.42109596729278564, + "learning_rate": 2.995025689390284e-05, + "loss": 0.3157, + "step": 7455, + "teacher_loss": 0.3039645552635193 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.367237389087677, + "learning_rate": 2.995007190618382e-05, + "loss": 0.2408, + "step": 7456, + "teacher_loss": 0.2267540991306305 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5828267335891724, + "learning_rate": 2.9949886575704477e-05, + "loss": 0.2683, + "step": 7457, + "teacher_loss": 0.23337845504283905 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.4457915127277374, + "learning_rate": 2.994970090246905e-05, + "loss": 0.5, + "step": 7458, + "teacher_loss": 0.506027102470398 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3433298170566559, + "learning_rate": 2.994951488648181e-05, + "loss": 0.3042, + "step": 7459, + "teacher_loss": 0.29985344409942627 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.29525110125541687, + "learning_rate": 2.994932852774701e-05, + "loss": 0.3455, + "step": 7460, + "teacher_loss": 0.3510931134223938 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.19486252963542938, + "learning_rate": 2.9949141826268927e-05, + "loss": 0.1768, + "step": 7461, + "teacher_loss": 0.17478960752487183 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3592166602611542, + "learning_rate": 2.9948954782051847e-05, + "loss": 0.2443, + "step": 7462, + "teacher_loss": 0.2314939796924591 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5024241209030151, + "learning_rate": 2.994876739510005e-05, + "loss": 0.5241, + "step": 7463, + "teacher_loss": 0.5264977812767029 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.628815770149231, + "learning_rate": 2.9948579665417834e-05, + "loss": 0.4483, + "step": 7464, + "teacher_loss": 0.4282122552394867 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3290923237800598, + "learning_rate": 2.9948391593009506e-05, + "loss": 0.164, + "step": 7465, + "teacher_loss": 0.1456906944513321 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.7108486294746399, + "learning_rate": 2.9948203177879372e-05, + "loss": 0.6942, + "step": 7466, + "teacher_loss": 0.6923169493675232 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.6858711242675781, + "learning_rate": 2.9948014420031763e-05, + "loss": 0.4079, + "step": 7467, + "teacher_loss": 0.3769756257534027 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.4692775011062622, + "learning_rate": 2.9947825319471e-05, + "loss": 0.1971, + "step": 7468, + "teacher_loss": 0.1669057011604309 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.434905469417572, + "learning_rate": 2.9947635876201415e-05, + "loss": 0.2707, + "step": 7469, + "teacher_loss": 0.2524486184120178 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.2583503723144531, + "learning_rate": 2.9947446090227352e-05, + "loss": 0.2059, + "step": 7470, + "teacher_loss": 0.20002153515815735 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.41995954513549805, + "learning_rate": 2.9947255961553164e-05, + "loss": 0.31, + "step": 7471, + "teacher_loss": 0.2977794110774994 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3154115676879883, + "learning_rate": 2.994706549018322e-05, + "loss": 0.2359, + "step": 7472, + "teacher_loss": 0.22709864377975464 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.4403138756752014, + "learning_rate": 2.994687467612187e-05, + "loss": 0.2506, + "step": 7473, + "teacher_loss": 0.22953663766384125 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.6175891160964966, + "learning_rate": 2.99466835193735e-05, + "loss": 0.242, + "step": 7474, + "teacher_loss": 0.20021606981754303 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.4916706681251526, + "learning_rate": 2.994649201994249e-05, + "loss": 0.438, + "step": 7475, + "teacher_loss": 0.4320324957370758 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 1.0189846754074097, + "learning_rate": 2.994630017783323e-05, + "loss": 0.4673, + "step": 7476, + "teacher_loss": 0.40599769353866577 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.1506902277469635, + "learning_rate": 2.9946107993050115e-05, + "loss": 0.1571, + "step": 7477, + "teacher_loss": 0.15786322951316833 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.10593598335981369, + "learning_rate": 2.9945915465597557e-05, + "loss": 0.1812, + "step": 7478, + "teacher_loss": 0.18961024284362793 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.46019983291625977, + "learning_rate": 2.9945722595479965e-05, + "loss": 0.2845, + "step": 7479, + "teacher_loss": 0.2649534046649933 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.6787864565849304, + "learning_rate": 2.994552938270177e-05, + "loss": 0.2742, + "step": 7480, + "teacher_loss": 0.22929373383522034 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.34946414828300476, + "learning_rate": 2.9945335827267388e-05, + "loss": 0.2683, + "step": 7481, + "teacher_loss": 0.2592858076095581 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 1.390134334564209, + "learning_rate": 2.9945141929181267e-05, + "loss": 0.3425, + "step": 7482, + "teacher_loss": 0.2260952889919281 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5059945583343506, + "learning_rate": 2.994494768844785e-05, + "loss": 0.3265, + "step": 7483, + "teacher_loss": 0.3065948486328125 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.8946864604949951, + "learning_rate": 2.9944753105071587e-05, + "loss": 0.4428, + "step": 7484, + "teacher_loss": 0.3925560712814331 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3887980878353119, + "learning_rate": 2.9944558179056944e-05, + "loss": 0.1781, + "step": 7485, + "teacher_loss": 0.15467095375061035 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.4459196627140045, + "learning_rate": 2.9944362910408393e-05, + "loss": 0.2961, + "step": 7486, + "teacher_loss": 0.27947306632995605 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5475760698318481, + "learning_rate": 2.9944167299130397e-05, + "loss": 0.2545, + "step": 7487, + "teacher_loss": 0.22190696001052856 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3333711624145508, + "learning_rate": 2.9943971345227452e-05, + "loss": 0.2768, + "step": 7488, + "teacher_loss": 0.27047520875930786 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3368968963623047, + "learning_rate": 2.9943775048704053e-05, + "loss": 0.2132, + "step": 7489, + "teacher_loss": 0.1994766891002655 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.1833430528640747, + "learning_rate": 2.994357840956469e-05, + "loss": 0.1789, + "step": 7490, + "teacher_loss": 0.17837193608283997 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.3790837228298187, + "learning_rate": 2.9943381427813882e-05, + "loss": 0.2324, + "step": 7491, + "teacher_loss": 0.21615347266197205 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.5838605165481567, + "learning_rate": 2.994318410345614e-05, + "loss": 0.3608, + "step": 7492, + "teacher_loss": 0.3359782099723816 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.2583012878894806, + "learning_rate": 2.9942986436495986e-05, + "loss": 0.2216, + "step": 7493, + "teacher_loss": 0.2175406515598297 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 1.3319706916809082, + "learning_rate": 2.9942788426937956e-05, + "loss": 0.381, + "step": 7494, + "teacher_loss": 0.2753249406814575 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.36045414209365845, + "learning_rate": 2.9942590074786588e-05, + "loss": 0.2097, + "step": 7495, + "teacher_loss": 0.19290709495544434 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.25065258145332336, + "learning_rate": 2.9942391380046433e-05, + "loss": 0.2755, + "step": 7496, + "teacher_loss": 0.2783011198043823 + }, + { + "compression_loss": 0.0, + "epoch": 1.35, + "label_loss": 0.6273810863494873, + "learning_rate": 2.9942192342722037e-05, + "loss": 0.3676, + "step": 7497, + "teacher_loss": 0.3387652039527893 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4858560860157013, + "learning_rate": 2.994199296281797e-05, + "loss": 0.2166, + "step": 7498, + "teacher_loss": 0.18671831488609314 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.3831263780593872, + "learning_rate": 2.994179324033881e-05, + "loss": 0.2803, + "step": 7499, + "teacher_loss": 0.26887187361717224 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.7703880071640015, + "learning_rate": 2.994159317528912e-05, + "loss": 0.2757, + "step": 7500, + "teacher_loss": 0.22071754932403564 + }, + { + "epoch": 1.36, + "eval_exact_match": 78.90255439924314, + "eval_f1": 86.43267132876123, + "step": 7500 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.8324979543685913, + "learning_rate": 2.99413927676735e-05, + "loss": 0.3098, + "step": 7501, + "teacher_loss": 0.25171077251434326 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6357442736625671, + "learning_rate": 2.9941192017496545e-05, + "loss": 0.3712, + "step": 7502, + "teacher_loss": 0.34180718660354614 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4425898790359497, + "learning_rate": 2.9940990924762846e-05, + "loss": 0.2492, + "step": 7503, + "teacher_loss": 0.22773519158363342 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.7362959980964661, + "learning_rate": 2.994078948947702e-05, + "loss": 0.4452, + "step": 7504, + "teacher_loss": 0.412894070148468 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5426827669143677, + "learning_rate": 2.9940587711643693e-05, + "loss": 0.2662, + "step": 7505, + "teacher_loss": 0.23547953367233276 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.3502090573310852, + "learning_rate": 2.9940385591267477e-05, + "loss": 0.1905, + "step": 7506, + "teacher_loss": 0.17271116375923157 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6637804508209229, + "learning_rate": 2.9940183128353015e-05, + "loss": 0.2245, + "step": 7507, + "teacher_loss": 0.17564037442207336 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.23844844102859497, + "learning_rate": 2.9939980322904948e-05, + "loss": 0.2011, + "step": 7508, + "teacher_loss": 0.19697964191436768 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5452797412872314, + "learning_rate": 2.9939777174927924e-05, + "loss": 0.2766, + "step": 7509, + "teacher_loss": 0.2467808723449707 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4377099275588989, + "learning_rate": 2.9939573684426603e-05, + "loss": 0.3167, + "step": 7510, + "teacher_loss": 0.3032767176628113 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.41947972774505615, + "learning_rate": 2.9939369851405645e-05, + "loss": 0.236, + "step": 7511, + "teacher_loss": 0.21561065316200256 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.2861916422843933, + "learning_rate": 2.993916567586973e-05, + "loss": 0.1732, + "step": 7512, + "teacher_loss": 0.16060954332351685 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4824354648590088, + "learning_rate": 2.993896115782353e-05, + "loss": 0.2535, + "step": 7513, + "teacher_loss": 0.22809892892837524 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.2786598801612854, + "learning_rate": 2.993875629727175e-05, + "loss": 0.2425, + "step": 7514, + "teacher_loss": 0.23850062489509583 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.20274555683135986, + "learning_rate": 2.993855109421907e-05, + "loss": 0.1747, + "step": 7515, + "teacher_loss": 0.17157389223575592 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.29919224977493286, + "learning_rate": 2.99383455486702e-05, + "loss": 0.275, + "step": 7516, + "teacher_loss": 0.27230942249298096 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.8254399299621582, + "learning_rate": 2.9938139660629863e-05, + "loss": 0.6489, + "step": 7517, + "teacher_loss": 0.6292467713356018 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.32526054978370667, + "learning_rate": 2.9937933430102758e-05, + "loss": 0.2691, + "step": 7518, + "teacher_loss": 0.26285964250564575 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5519172549247742, + "learning_rate": 2.9937726857093636e-05, + "loss": 0.3313, + "step": 7519, + "teacher_loss": 0.3067835867404938 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.46934181451797485, + "learning_rate": 2.9937519941607216e-05, + "loss": 0.2677, + "step": 7520, + "teacher_loss": 0.2452666163444519 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.46599280834198, + "learning_rate": 2.9937312683648256e-05, + "loss": 0.2209, + "step": 7521, + "teacher_loss": 0.19371947646141052 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4714832305908203, + "learning_rate": 2.993710508322149e-05, + "loss": 0.268, + "step": 7522, + "teacher_loss": 0.24534301459789276 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5620614886283875, + "learning_rate": 2.99368971403317e-05, + "loss": 0.2017, + "step": 7523, + "teacher_loss": 0.16170698404312134 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4059962034225464, + "learning_rate": 2.9936688854983637e-05, + "loss": 0.2042, + "step": 7524, + "teacher_loss": 0.1817486435174942 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5865641832351685, + "learning_rate": 2.993648022718208e-05, + "loss": 0.2988, + "step": 7525, + "teacher_loss": 0.2668393552303314 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5302565693855286, + "learning_rate": 2.9936271256931812e-05, + "loss": 0.3135, + "step": 7526, + "teacher_loss": 0.2894470691680908 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6800379753112793, + "learning_rate": 2.9936061944237628e-05, + "loss": 0.2749, + "step": 7527, + "teacher_loss": 0.22983799874782562 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.3122449517250061, + "learning_rate": 2.993585228910432e-05, + "loss": 0.2216, + "step": 7528, + "teacher_loss": 0.21155352890491486 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 1.0097167491912842, + "learning_rate": 2.9935642291536706e-05, + "loss": 0.6414, + "step": 7529, + "teacher_loss": 0.6004397869110107 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6385592222213745, + "learning_rate": 2.9935431951539584e-05, + "loss": 0.4158, + "step": 7530, + "teacher_loss": 0.39110374450683594 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6485559940338135, + "learning_rate": 2.9935221269117795e-05, + "loss": 0.3553, + "step": 7531, + "teacher_loss": 0.3227570652961731 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.40885916352272034, + "learning_rate": 2.9935010244276155e-05, + "loss": 0.2703, + "step": 7532, + "teacher_loss": 0.2549360692501068 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.321699321269989, + "learning_rate": 2.993479887701951e-05, + "loss": 0.225, + "step": 7533, + "teacher_loss": 0.2142285704612732 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5968490839004517, + "learning_rate": 2.9934587167352708e-05, + "loss": 0.2061, + "step": 7534, + "teacher_loss": 0.162733793258667 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.3200331926345825, + "learning_rate": 2.9934375115280592e-05, + "loss": 0.2469, + "step": 7535, + "teacher_loss": 0.23878273367881775 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4204094409942627, + "learning_rate": 2.9934162720808028e-05, + "loss": 0.2404, + "step": 7536, + "teacher_loss": 0.22043679654598236 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.28288891911506653, + "learning_rate": 2.9933949983939894e-05, + "loss": 0.1838, + "step": 7537, + "teacher_loss": 0.17274974286556244 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6203982830047607, + "learning_rate": 2.993373690468106e-05, + "loss": 0.4273, + "step": 7538, + "teacher_loss": 0.4058406352996826 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4666936695575714, + "learning_rate": 2.9933523483036415e-05, + "loss": 0.263, + "step": 7539, + "teacher_loss": 0.24040162563323975 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5362464189529419, + "learning_rate": 2.9933309719010844e-05, + "loss": 0.2862, + "step": 7540, + "teacher_loss": 0.2584337890148163 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.23154741525650024, + "learning_rate": 2.9933095612609253e-05, + "loss": 0.2699, + "step": 7541, + "teacher_loss": 0.2742026746273041 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.2427349090576172, + "learning_rate": 2.9932881163836556e-05, + "loss": 0.1988, + "step": 7542, + "teacher_loss": 0.1939554512500763 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.18827080726623535, + "learning_rate": 2.9932666372697664e-05, + "loss": 0.2117, + "step": 7543, + "teacher_loss": 0.21433806419372559 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6037752628326416, + "learning_rate": 2.9932451239197498e-05, + "loss": 0.2735, + "step": 7544, + "teacher_loss": 0.2367558777332306 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.6233841180801392, + "learning_rate": 2.9932235763340997e-05, + "loss": 0.2895, + "step": 7545, + "teacher_loss": 0.25234729051589966 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.5477994680404663, + "learning_rate": 2.9932019945133104e-05, + "loss": 0.2491, + "step": 7546, + "teacher_loss": 0.215952530503273 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.4526468813419342, + "learning_rate": 2.9931803784578757e-05, + "loss": 0.2289, + "step": 7547, + "teacher_loss": 0.20406216382980347 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.312862366437912, + "learning_rate": 2.993158728168292e-05, + "loss": 0.3075, + "step": 7548, + "teacher_loss": 0.30693647265434265 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.7838975787162781, + "learning_rate": 2.9931370436450552e-05, + "loss": 0.3074, + "step": 7549, + "teacher_loss": 0.2545110583305359 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.8435627222061157, + "learning_rate": 2.993115324888663e-05, + "loss": 0.3417, + "step": 7550, + "teacher_loss": 0.2859613299369812 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.588554859161377, + "learning_rate": 2.9930935718996122e-05, + "loss": 0.2999, + "step": 7551, + "teacher_loss": 0.2677900195121765 + }, + { + "compression_loss": 0.0, + "epoch": 1.36, + "label_loss": 0.36412879824638367, + "learning_rate": 2.993071784678403e-05, + "loss": 0.3387, + "step": 7552, + "teacher_loss": 0.3358222544193268 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.46777772903442383, + "learning_rate": 2.993049963225534e-05, + "loss": 0.3756, + "step": 7553, + "teacher_loss": 0.3653554916381836 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 1.0213065147399902, + "learning_rate": 2.993028107541506e-05, + "loss": 0.7713, + "step": 7554, + "teacher_loss": 0.7435516119003296 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.607062578201294, + "learning_rate": 2.9930062176268196e-05, + "loss": 0.3089, + "step": 7555, + "teacher_loss": 0.2757790982723236 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3792635202407837, + "learning_rate": 2.9929842934819768e-05, + "loss": 0.2933, + "step": 7556, + "teacher_loss": 0.28380149602890015 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3142491281032562, + "learning_rate": 2.9929623351074806e-05, + "loss": 0.2243, + "step": 7557, + "teacher_loss": 0.21432015299797058 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.4454195499420166, + "learning_rate": 2.9929403425038338e-05, + "loss": 0.2411, + "step": 7558, + "teacher_loss": 0.2183436155319214 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.46095946431159973, + "learning_rate": 2.992918315671541e-05, + "loss": 0.2912, + "step": 7559, + "teacher_loss": 0.27229368686676025 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.4410061240196228, + "learning_rate": 2.992896254611108e-05, + "loss": 0.3261, + "step": 7560, + "teacher_loss": 0.31338778138160706 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.9786903262138367, + "learning_rate": 2.9928741593230393e-05, + "loss": 0.3817, + "step": 7561, + "teacher_loss": 0.31538426876068115 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3916565775871277, + "learning_rate": 2.9928520298078417e-05, + "loss": 0.2544, + "step": 7562, + "teacher_loss": 0.23915785551071167 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.228489950299263, + "learning_rate": 2.992829866066023e-05, + "loss": 0.2791, + "step": 7563, + "teacher_loss": 0.2847137451171875 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3630978465080261, + "learning_rate": 2.9928076680980917e-05, + "loss": 0.1845, + "step": 7564, + "teacher_loss": 0.16466104984283447 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5929465293884277, + "learning_rate": 2.992785435904556e-05, + "loss": 0.2402, + "step": 7565, + "teacher_loss": 0.2010573446750641 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.6393760442733765, + "learning_rate": 2.9927631694859256e-05, + "loss": 0.3202, + "step": 7566, + "teacher_loss": 0.2847674787044525 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.26598042249679565, + "learning_rate": 2.9927408688427115e-05, + "loss": 0.2954, + "step": 7567, + "teacher_loss": 0.29863959550857544 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5515957474708557, + "learning_rate": 2.9927185339754245e-05, + "loss": 0.3485, + "step": 7568, + "teacher_loss": 0.3259860873222351 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.6076855659484863, + "learning_rate": 2.9926961648845774e-05, + "loss": 0.3017, + "step": 7569, + "teacher_loss": 0.26764625310897827 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.571256160736084, + "learning_rate": 2.9926737615706823e-05, + "loss": 0.2456, + "step": 7570, + "teacher_loss": 0.20946840941905975 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.34154656529426575, + "learning_rate": 2.9926513240342527e-05, + "loss": 0.3439, + "step": 7571, + "teacher_loss": 0.34421372413635254 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5341852307319641, + "learning_rate": 2.992628852275804e-05, + "loss": 0.4697, + "step": 7572, + "teacher_loss": 0.4625610113143921 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.21058453619480133, + "learning_rate": 2.992606346295851e-05, + "loss": 0.2269, + "step": 7573, + "teacher_loss": 0.22871890664100647 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5513201951980591, + "learning_rate": 2.9925838060949087e-05, + "loss": 0.3563, + "step": 7574, + "teacher_loss": 0.33462658524513245 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.49390077590942383, + "learning_rate": 2.9925612316734957e-05, + "loss": 0.3447, + "step": 7575, + "teacher_loss": 0.32809942960739136 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.4184456467628479, + "learning_rate": 2.992538623032128e-05, + "loss": 0.2226, + "step": 7576, + "teacher_loss": 0.20087221264839172 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5866072177886963, + "learning_rate": 2.9925159801713243e-05, + "loss": 0.4075, + "step": 7577, + "teacher_loss": 0.3876102864742279 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3299693763256073, + "learning_rate": 2.9924933030916044e-05, + "loss": 0.2544, + "step": 7578, + "teacher_loss": 0.2460080236196518 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.597364068031311, + "learning_rate": 2.992470591793488e-05, + "loss": 0.2622, + "step": 7579, + "teacher_loss": 0.22494718432426453 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.9301906228065491, + "learning_rate": 2.992447846277495e-05, + "loss": 0.8035, + "step": 7580, + "teacher_loss": 0.789463996887207 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.44339799880981445, + "learning_rate": 2.9924250665441478e-05, + "loss": 0.4921, + "step": 7581, + "teacher_loss": 0.4974651634693146 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3032291531562805, + "learning_rate": 2.9924022525939684e-05, + "loss": 0.216, + "step": 7582, + "teacher_loss": 0.20626375079154968 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.7703602313995361, + "learning_rate": 2.99237940442748e-05, + "loss": 0.3301, + "step": 7583, + "teacher_loss": 0.28113555908203125 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.21320851147174835, + "learning_rate": 2.9923565220452058e-05, + "loss": 0.3109, + "step": 7584, + "teacher_loss": 0.3217393159866333 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.36754024028778076, + "learning_rate": 2.9923336054476708e-05, + "loss": 0.2144, + "step": 7585, + "teacher_loss": 0.19742700457572937 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.32668545842170715, + "learning_rate": 2.992310654635401e-05, + "loss": 0.2631, + "step": 7586, + "teacher_loss": 0.2560487985610962 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5014143586158752, + "learning_rate": 2.992287669608922e-05, + "loss": 0.2608, + "step": 7587, + "teacher_loss": 0.2340397834777832 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.38248589634895325, + "learning_rate": 2.9922646503687603e-05, + "loss": 0.254, + "step": 7588, + "teacher_loss": 0.2397211492061615 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5165925621986389, + "learning_rate": 2.9922415969154445e-05, + "loss": 0.4666, + "step": 7589, + "teacher_loss": 0.46099644899368286 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.4691811800003052, + "learning_rate": 2.992218509249503e-05, + "loss": 0.1961, + "step": 7590, + "teacher_loss": 0.1658032089471817 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3885548710823059, + "learning_rate": 2.9921953873714652e-05, + "loss": 0.3609, + "step": 7591, + "teacher_loss": 0.35786527395248413 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.2618866562843323, + "learning_rate": 2.9921722312818604e-05, + "loss": 0.2775, + "step": 7592, + "teacher_loss": 0.2792610228061676 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.32753002643585205, + "learning_rate": 2.9921490409812203e-05, + "loss": 0.4075, + "step": 7593, + "teacher_loss": 0.41635072231292725 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.536220908164978, + "learning_rate": 2.9921258164700765e-05, + "loss": 0.3394, + "step": 7594, + "teacher_loss": 0.3175109624862671 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.6709280014038086, + "learning_rate": 2.9921025577489617e-05, + "loss": 0.7408, + "step": 7595, + "teacher_loss": 0.7485227584838867 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.5450140237808228, + "learning_rate": 2.9920792648184083e-05, + "loss": 0.2741, + "step": 7596, + "teacher_loss": 0.24402843415737152 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.4634959101676941, + "learning_rate": 2.992055937678951e-05, + "loss": 0.2739, + "step": 7597, + "teacher_loss": 0.2528146803379059 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.3967730700969696, + "learning_rate": 2.9920325763311242e-05, + "loss": 0.422, + "step": 7598, + "teacher_loss": 0.42482852935791016 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.696155309677124, + "learning_rate": 2.992009180775464e-05, + "loss": 0.3298, + "step": 7599, + "teacher_loss": 0.2890735864639282 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.21985122561454773, + "learning_rate": 2.9919857510125064e-05, + "loss": 0.1969, + "step": 7600, + "teacher_loss": 0.19437068700790405 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.6172599792480469, + "learning_rate": 2.9919622870427893e-05, + "loss": 0.2602, + "step": 7601, + "teacher_loss": 0.22054541110992432 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.14786367118358612, + "learning_rate": 2.9919387888668494e-05, + "loss": 0.1429, + "step": 7602, + "teacher_loss": 0.14233875274658203 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.33377137780189514, + "learning_rate": 2.9919152564852268e-05, + "loss": 0.2344, + "step": 7603, + "teacher_loss": 0.22334596514701843 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.2726837694644928, + "learning_rate": 2.9918916898984598e-05, + "loss": 0.234, + "step": 7604, + "teacher_loss": 0.22966215014457703 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.20103251934051514, + "learning_rate": 2.9918680891070896e-05, + "loss": 0.1732, + "step": 7605, + "teacher_loss": 0.170084148645401 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.8575376272201538, + "learning_rate": 2.9918444541116568e-05, + "loss": 0.375, + "step": 7606, + "teacher_loss": 0.32138872146606445 + }, + { + "compression_loss": 0.0, + "epoch": 1.37, + "label_loss": 0.26272910833358765, + "learning_rate": 2.9918207849127033e-05, + "loss": 0.2964, + "step": 7607, + "teacher_loss": 0.30010873079299927 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.6971885561943054, + "learning_rate": 2.991797081510772e-05, + "loss": 0.3172, + "step": 7608, + "teacher_loss": 0.274998277425766 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.31353896856307983, + "learning_rate": 2.9917733439064065e-05, + "loss": 0.2456, + "step": 7609, + "teacher_loss": 0.23810040950775146 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5445033311843872, + "learning_rate": 2.9917495721001505e-05, + "loss": 0.2243, + "step": 7610, + "teacher_loss": 0.18877622485160828 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 1.0506901741027832, + "learning_rate": 2.9917257660925495e-05, + "loss": 0.547, + "step": 7611, + "teacher_loss": 0.49104979634284973 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.19204777479171753, + "learning_rate": 2.9917019258841492e-05, + "loss": 0.2418, + "step": 7612, + "teacher_loss": 0.24727264046669006 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.26949289441108704, + "learning_rate": 2.991678051475496e-05, + "loss": 0.2277, + "step": 7613, + "teacher_loss": 0.22303706407546997 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5632785558700562, + "learning_rate": 2.9916541428671373e-05, + "loss": 0.2555, + "step": 7614, + "teacher_loss": 0.22132110595703125 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4861583113670349, + "learning_rate": 2.9916302000596215e-05, + "loss": 0.2483, + "step": 7615, + "teacher_loss": 0.22183868288993835 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5267183184623718, + "learning_rate": 2.9916062230534968e-05, + "loss": 0.292, + "step": 7616, + "teacher_loss": 0.26588404178619385 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.7098817825317383, + "learning_rate": 2.991582211849314e-05, + "loss": 0.3411, + "step": 7617, + "teacher_loss": 0.30013298988342285 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.451063334941864, + "learning_rate": 2.991558166447623e-05, + "loss": 0.2836, + "step": 7618, + "teacher_loss": 0.26503312587738037 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.7276087999343872, + "learning_rate": 2.991534086848975e-05, + "loss": 0.4087, + "step": 7619, + "teacher_loss": 0.37330079078674316 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5973576307296753, + "learning_rate": 2.9915099730539223e-05, + "loss": 0.361, + "step": 7620, + "teacher_loss": 0.33478158712387085 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.23257100582122803, + "learning_rate": 2.991485825063018e-05, + "loss": 0.21, + "step": 7621, + "teacher_loss": 0.20745518803596497 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.9342533349990845, + "learning_rate": 2.9914616428768153e-05, + "loss": 0.6189, + "step": 7622, + "teacher_loss": 0.5838311910629272 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.6175295114517212, + "learning_rate": 2.9914374264958684e-05, + "loss": 0.2455, + "step": 7623, + "teacher_loss": 0.20417055487632751 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.2612594664096832, + "learning_rate": 2.991413175920733e-05, + "loss": 0.2373, + "step": 7624, + "teacher_loss": 0.23464033007621765 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4282259941101074, + "learning_rate": 2.991388891151965e-05, + "loss": 0.2786, + "step": 7625, + "teacher_loss": 0.2619744837284088 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3231649398803711, + "learning_rate": 2.991364572190121e-05, + "loss": 0.2784, + "step": 7626, + "teacher_loss": 0.27348142862319946 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5406469106674194, + "learning_rate": 2.9913402190357596e-05, + "loss": 0.2933, + "step": 7627, + "teacher_loss": 0.2658522427082062 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4537014067173004, + "learning_rate": 2.9913158316894374e-05, + "loss": 0.2408, + "step": 7628, + "teacher_loss": 0.21709023416042328 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4893779754638672, + "learning_rate": 2.9912914101517144e-05, + "loss": 0.2604, + "step": 7629, + "teacher_loss": 0.23499667644500732 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.39155447483062744, + "learning_rate": 2.9912669544231507e-05, + "loss": 0.3032, + "step": 7630, + "teacher_loss": 0.2934088408946991 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5394455194473267, + "learning_rate": 2.9912424645043064e-05, + "loss": 0.3402, + "step": 7631, + "teacher_loss": 0.3180971145629883 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.9320620894432068, + "learning_rate": 2.991217940395744e-05, + "loss": 0.3225, + "step": 7632, + "teacher_loss": 0.25479528307914734 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4853166341781616, + "learning_rate": 2.991193382098025e-05, + "loss": 0.2637, + "step": 7633, + "teacher_loss": 0.239122211933136 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3353040814399719, + "learning_rate": 2.9911687896117126e-05, + "loss": 0.3197, + "step": 7634, + "teacher_loss": 0.31792449951171875 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5667581558227539, + "learning_rate": 2.99114416293737e-05, + "loss": 0.4046, + "step": 7635, + "teacher_loss": 0.38663744926452637 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.18207284808158875, + "learning_rate": 2.991119502075563e-05, + "loss": 0.2295, + "step": 7636, + "teacher_loss": 0.23474003374576569 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4621998071670532, + "learning_rate": 2.9910948070268562e-05, + "loss": 0.3162, + "step": 7637, + "teacher_loss": 0.300016313791275 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.38535675406455994, + "learning_rate": 2.991070077791816e-05, + "loss": 0.2576, + "step": 7638, + "teacher_loss": 0.2433701455593109 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.49239856004714966, + "learning_rate": 2.9910453143710096e-05, + "loss": 0.2226, + "step": 7639, + "teacher_loss": 0.19262206554412842 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.27584561705589294, + "learning_rate": 2.9910205167650044e-05, + "loss": 0.1819, + "step": 7640, + "teacher_loss": 0.17148154973983765 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.41592133045196533, + "learning_rate": 2.990995684974369e-05, + "loss": 0.2771, + "step": 7641, + "teacher_loss": 0.2617039084434509 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.6000227332115173, + "learning_rate": 2.9909708189996728e-05, + "loss": 0.2176, + "step": 7642, + "teacher_loss": 0.17507179081439972 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.7403451800346375, + "learning_rate": 2.990945918841486e-05, + "loss": 0.3587, + "step": 7643, + "teacher_loss": 0.3163034915924072 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3336305618286133, + "learning_rate": 2.990920984500379e-05, + "loss": 0.1946, + "step": 7644, + "teacher_loss": 0.1792076826095581 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 1.288038969039917, + "learning_rate": 2.9908960159769243e-05, + "loss": 0.7831, + "step": 7645, + "teacher_loss": 0.727039635181427 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.8299707770347595, + "learning_rate": 2.990871013271694e-05, + "loss": 0.3659, + "step": 7646, + "teacher_loss": 0.3143097162246704 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4849017262458801, + "learning_rate": 2.9908459763852605e-05, + "loss": 0.8342, + "step": 7647, + "teacher_loss": 0.8730412721633911 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3111902177333832, + "learning_rate": 2.990820905318199e-05, + "loss": 0.2294, + "step": 7648, + "teacher_loss": 0.220281183719635 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.24912330508232117, + "learning_rate": 2.9907958000710838e-05, + "loss": 0.2592, + "step": 7649, + "teacher_loss": 0.26032960414886475 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.20828185975551605, + "learning_rate": 2.9907706606444905e-05, + "loss": 0.2203, + "step": 7650, + "teacher_loss": 0.22168032824993134 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5150014162063599, + "learning_rate": 2.9907454870389957e-05, + "loss": 0.2407, + "step": 7651, + "teacher_loss": 0.21016745269298553 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.1890479028224945, + "learning_rate": 2.9907202792551764e-05, + "loss": 0.2251, + "step": 7652, + "teacher_loss": 0.229139506816864 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.35701701045036316, + "learning_rate": 2.99069503729361e-05, + "loss": 0.2391, + "step": 7653, + "teacher_loss": 0.22603777050971985 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4295775890350342, + "learning_rate": 2.9906697611548767e-05, + "loss": 0.248, + "step": 7654, + "teacher_loss": 0.22784534096717834 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.24752330780029297, + "learning_rate": 2.9906444508395544e-05, + "loss": 0.2023, + "step": 7655, + "teacher_loss": 0.19729755818843842 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3397597074508667, + "learning_rate": 2.990619106348224e-05, + "loss": 0.222, + "step": 7656, + "teacher_loss": 0.20892798900604248 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.32617202401161194, + "learning_rate": 2.9905937276814666e-05, + "loss": 0.3033, + "step": 7657, + "teacher_loss": 0.3007524609565735 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.4927031099796295, + "learning_rate": 2.9905683148398642e-05, + "loss": 0.2285, + "step": 7658, + "teacher_loss": 0.19910714030265808 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3975003659725189, + "learning_rate": 2.990542867823999e-05, + "loss": 0.2165, + "step": 7659, + "teacher_loss": 0.1964210420846939 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.5493260025978088, + "learning_rate": 2.990517386634455e-05, + "loss": 0.2391, + "step": 7660, + "teacher_loss": 0.20462894439697266 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.3311784565448761, + "learning_rate": 2.990491871271816e-05, + "loss": 0.2201, + "step": 7661, + "teacher_loss": 0.20771250128746033 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.9117849469184875, + "learning_rate": 2.990466321736667e-05, + "loss": 0.5034, + "step": 7662, + "teacher_loss": 0.4580279588699341 + }, + { + "compression_loss": 0.0, + "epoch": 1.38, + "label_loss": 0.9144551157951355, + "learning_rate": 2.990440738029594e-05, + "loss": 0.657, + "step": 7663, + "teacher_loss": 0.628434956073761 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.11332852393388748, + "learning_rate": 2.9904151201511835e-05, + "loss": 0.1796, + "step": 7664, + "teacher_loss": 0.18698114156723022 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.8387352228164673, + "learning_rate": 2.990389468102023e-05, + "loss": 0.4767, + "step": 7665, + "teacher_loss": 0.4364364743232727 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.3453589081764221, + "learning_rate": 2.9903637818827e-05, + "loss": 0.1956, + "step": 7666, + "teacher_loss": 0.17895755171775818 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.4446118175983429, + "learning_rate": 2.9903380614938047e-05, + "loss": 0.3536, + "step": 7667, + "teacher_loss": 0.3434881865978241 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.6534728407859802, + "learning_rate": 2.9903123069359247e-05, + "loss": 0.2781, + "step": 7668, + "teacher_loss": 0.2363986372947693 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.8965345621109009, + "learning_rate": 2.9902865182096524e-05, + "loss": 0.5777, + "step": 7669, + "teacher_loss": 0.5422395467758179 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.31460219621658325, + "learning_rate": 2.990260695315578e-05, + "loss": 0.251, + "step": 7670, + "teacher_loss": 0.2439253032207489 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 1.4292893409729004, + "learning_rate": 2.9902348382542943e-05, + "loss": 0.4601, + "step": 7671, + "teacher_loss": 0.3523657023906708 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.812456488609314, + "learning_rate": 2.9902089470263937e-05, + "loss": 0.2741, + "step": 7672, + "teacher_loss": 0.21432796120643616 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.42403146624565125, + "learning_rate": 2.9901830216324694e-05, + "loss": 0.2066, + "step": 7673, + "teacher_loss": 0.1824251413345337 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.35832852125167847, + "learning_rate": 2.9901570620731165e-05, + "loss": 0.326, + "step": 7674, + "teacher_loss": 0.3224143087863922 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.39909595251083374, + "learning_rate": 2.99013106834893e-05, + "loss": 0.3171, + "step": 7675, + "teacher_loss": 0.3080254793167114 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5603435039520264, + "learning_rate": 2.9901050404605054e-05, + "loss": 0.2804, + "step": 7676, + "teacher_loss": 0.24925857782363892 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.7439641952514648, + "learning_rate": 2.9900789784084396e-05, + "loss": 0.6849, + "step": 7677, + "teacher_loss": 0.6783853769302368 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.7180014252662659, + "learning_rate": 2.9900528821933307e-05, + "loss": 0.457, + "step": 7678, + "teacher_loss": 0.4279584288597107 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5929282903671265, + "learning_rate": 2.990026751815777e-05, + "loss": 0.2533, + "step": 7679, + "teacher_loss": 0.21560589969158173 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.4413987994194031, + "learning_rate": 2.9900005872763767e-05, + "loss": 0.2843, + "step": 7680, + "teacher_loss": 0.2668389081954956 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.27870482206344604, + "learning_rate": 2.9899743885757303e-05, + "loss": 0.1888, + "step": 7681, + "teacher_loss": 0.1788131296634674 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.2997399568557739, + "learning_rate": 2.9899481557144387e-05, + "loss": 0.2916, + "step": 7682, + "teacher_loss": 0.29066556692123413 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.7859231233596802, + "learning_rate": 2.9899218886931024e-05, + "loss": 0.4196, + "step": 7683, + "teacher_loss": 0.37890440225601196 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.6184636354446411, + "learning_rate": 2.9898955875123243e-05, + "loss": 0.2992, + "step": 7684, + "teacher_loss": 0.26370954513549805 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.45271170139312744, + "learning_rate": 2.9898692521727074e-05, + "loss": 0.2663, + "step": 7685, + "teacher_loss": 0.24558836221694946 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 1.5589104890823364, + "learning_rate": 2.9898428826748556e-05, + "loss": 0.5338, + "step": 7686, + "teacher_loss": 0.4199133813381195 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.16332146525382996, + "learning_rate": 2.989816479019373e-05, + "loss": 0.2172, + "step": 7687, + "teacher_loss": 0.22313950955867767 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.6288355588912964, + "learning_rate": 2.9897900412068658e-05, + "loss": 0.3145, + "step": 7688, + "teacher_loss": 0.27959388494491577 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.10287728905677795, + "learning_rate": 2.9897635692379387e-05, + "loss": 0.1673, + "step": 7689, + "teacher_loss": 0.17445877194404602 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.45436355471611023, + "learning_rate": 2.9897370631132002e-05, + "loss": 0.5602, + "step": 7690, + "teacher_loss": 0.5719693899154663 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.20927652716636658, + "learning_rate": 2.989710522833257e-05, + "loss": 0.2672, + "step": 7691, + "teacher_loss": 0.27360397577285767 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.2832906246185303, + "learning_rate": 2.9896839483987177e-05, + "loss": 0.1737, + "step": 7692, + "teacher_loss": 0.1615416705608368 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5547175407409668, + "learning_rate": 2.9896573398101925e-05, + "loss": 0.3049, + "step": 7693, + "teacher_loss": 0.2771707773208618 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5389564037322998, + "learning_rate": 2.98963069706829e-05, + "loss": 0.3383, + "step": 7694, + "teacher_loss": 0.3159579038619995 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.42774468660354614, + "learning_rate": 2.989604020173622e-05, + "loss": 0.2761, + "step": 7695, + "teacher_loss": 0.2592783570289612 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.3508699834346771, + "learning_rate": 2.9895773091268002e-05, + "loss": 0.2075, + "step": 7696, + "teacher_loss": 0.1915394365787506 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5646740198135376, + "learning_rate": 2.989550563928436e-05, + "loss": 0.3057, + "step": 7697, + "teacher_loss": 0.2768925428390503 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.16079141199588776, + "learning_rate": 2.9895237845791437e-05, + "loss": 0.2271, + "step": 7698, + "teacher_loss": 0.2344880998134613 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.20284727215766907, + "learning_rate": 2.9894969710795368e-05, + "loss": 0.1987, + "step": 7699, + "teacher_loss": 0.19820529222488403 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.40466055274009705, + "learning_rate": 2.9894701234302303e-05, + "loss": 0.3401, + "step": 7700, + "teacher_loss": 0.3329346776008606 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.38243868947029114, + "learning_rate": 2.989443241631839e-05, + "loss": 0.2912, + "step": 7701, + "teacher_loss": 0.2810543179512024 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.6176409721374512, + "learning_rate": 2.9894163256849803e-05, + "loss": 0.3587, + "step": 7702, + "teacher_loss": 0.3298988938331604 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.3861449360847473, + "learning_rate": 2.9893893755902705e-05, + "loss": 0.1971, + "step": 7703, + "teacher_loss": 0.1761438548564911 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5779880285263062, + "learning_rate": 2.9893623913483276e-05, + "loss": 0.2482, + "step": 7704, + "teacher_loss": 0.21156755089759827 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.7626945972442627, + "learning_rate": 2.9893353729597706e-05, + "loss": 0.2963, + "step": 7705, + "teacher_loss": 0.2445230334997177 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.38619405031204224, + "learning_rate": 2.9893083204252187e-05, + "loss": 0.4251, + "step": 7706, + "teacher_loss": 0.42941296100616455 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.7126033306121826, + "learning_rate": 2.9892812337452924e-05, + "loss": 0.3314, + "step": 7707, + "teacher_loss": 0.289096862077713 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.3210299015045166, + "learning_rate": 2.9892541129206122e-05, + "loss": 0.2735, + "step": 7708, + "teacher_loss": 0.2682119309902191 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5015177130699158, + "learning_rate": 2.9892269579518005e-05, + "loss": 0.2967, + "step": 7709, + "teacher_loss": 0.2739132046699524 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.6347278356552124, + "learning_rate": 2.9891997688394792e-05, + "loss": 0.2161, + "step": 7710, + "teacher_loss": 0.16954763233661652 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.40542662143707275, + "learning_rate": 2.9891725455842726e-05, + "loss": 0.317, + "step": 7711, + "teacher_loss": 0.30716565251350403 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.08896270394325256, + "learning_rate": 2.989145288186804e-05, + "loss": 0.1219, + "step": 7712, + "teacher_loss": 0.125535249710083 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 1.1812069416046143, + "learning_rate": 2.989117996647699e-05, + "loss": 0.3151, + "step": 7713, + "teacher_loss": 0.21887724101543427 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.6275110840797424, + "learning_rate": 2.989090670967582e-05, + "loss": 0.2753, + "step": 7714, + "teacher_loss": 0.23614096641540527 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.5886155366897583, + "learning_rate": 2.9890633111470808e-05, + "loss": 0.2371, + "step": 7715, + "teacher_loss": 0.19809550046920776 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.8504222631454468, + "learning_rate": 2.9890359171868225e-05, + "loss": 0.682, + "step": 7716, + "teacher_loss": 0.6633404493331909 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.2692478895187378, + "learning_rate": 2.9890084890874353e-05, + "loss": 0.2671, + "step": 7717, + "teacher_loss": 0.26688599586486816 + }, + { + "compression_loss": 0.0, + "epoch": 1.39, + "label_loss": 0.28245532512664795, + "learning_rate": 2.9889810268495472e-05, + "loss": 0.2907, + "step": 7718, + "teacher_loss": 0.2916331887245178 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5584731698036194, + "learning_rate": 2.988953530473789e-05, + "loss": 0.2336, + "step": 7719, + "teacher_loss": 0.19752509891986847 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.25852152705192566, + "learning_rate": 2.9889259999607897e-05, + "loss": 0.2292, + "step": 7720, + "teacher_loss": 0.22590261697769165 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.25501549243927, + "learning_rate": 2.9888984353111814e-05, + "loss": 0.2072, + "step": 7721, + "teacher_loss": 0.2019159495830536 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 1.220973253250122, + "learning_rate": 2.9888708365255964e-05, + "loss": 0.3519, + "step": 7722, + "teacher_loss": 0.2553603947162628 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.25047096610069275, + "learning_rate": 2.9888432036046664e-05, + "loss": 0.2812, + "step": 7723, + "teacher_loss": 0.2846137583255768 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.676315426826477, + "learning_rate": 2.988815536549026e-05, + "loss": 0.2486, + "step": 7724, + "teacher_loss": 0.2010425627231598 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5959880352020264, + "learning_rate": 2.9887878353593093e-05, + "loss": 0.4573, + "step": 7725, + "teacher_loss": 0.44186708331108093 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.6098060607910156, + "learning_rate": 2.9887601000361505e-05, + "loss": 0.2973, + "step": 7726, + "teacher_loss": 0.26260310411453247 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.21846815943717957, + "learning_rate": 2.9887323305801863e-05, + "loss": 0.234, + "step": 7727, + "teacher_loss": 0.2356969267129898 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.7597025632858276, + "learning_rate": 2.9887045269920533e-05, + "loss": 0.262, + "step": 7728, + "teacher_loss": 0.20670972764492035 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.3945283889770508, + "learning_rate": 2.9886766892723887e-05, + "loss": 0.4478, + "step": 7729, + "teacher_loss": 0.45376574993133545 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.9825731515884399, + "learning_rate": 2.988648817421831e-05, + "loss": 0.4069, + "step": 7730, + "teacher_loss": 0.34298083186149597 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.36861345171928406, + "learning_rate": 2.9886209114410194e-05, + "loss": 0.2665, + "step": 7731, + "teacher_loss": 0.25520598888397217 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.2924313545227051, + "learning_rate": 2.9885929713305927e-05, + "loss": 0.3089, + "step": 7732, + "teacher_loss": 0.31077510118484497 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5260310769081116, + "learning_rate": 2.9885649970911934e-05, + "loss": 0.3034, + "step": 7733, + "teacher_loss": 0.2786204218864441 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.25850313901901245, + "learning_rate": 2.9885369887234603e-05, + "loss": 0.1906, + "step": 7734, + "teacher_loss": 0.18305891752243042 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.6007351875305176, + "learning_rate": 2.9885089462280377e-05, + "loss": 0.2785, + "step": 7735, + "teacher_loss": 0.24267081916332245 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.6686475872993469, + "learning_rate": 2.9884808696055675e-05, + "loss": 0.256, + "step": 7736, + "teacher_loss": 0.2101626694202423 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.21142703294754028, + "learning_rate": 2.988452758856694e-05, + "loss": 0.2518, + "step": 7737, + "teacher_loss": 0.25626808404922485 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5742628574371338, + "learning_rate": 2.9884246139820613e-05, + "loss": 0.4333, + "step": 7738, + "teacher_loss": 0.41760575771331787 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.8935059905052185, + "learning_rate": 2.9883964349823142e-05, + "loss": 0.3832, + "step": 7739, + "teacher_loss": 0.326495498418808 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.16041089594364166, + "learning_rate": 2.9883682218580993e-05, + "loss": 0.1812, + "step": 7740, + "teacher_loss": 0.18348428606987 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5403831005096436, + "learning_rate": 2.988339974610064e-05, + "loss": 0.3149, + "step": 7741, + "teacher_loss": 0.2898145914077759 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.1856047809123993, + "learning_rate": 2.988311693238855e-05, + "loss": 0.2028, + "step": 7742, + "teacher_loss": 0.20466753840446472 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.7981098890304565, + "learning_rate": 2.9882833777451215e-05, + "loss": 0.5572, + "step": 7743, + "teacher_loss": 0.5304381847381592 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.18892623484134674, + "learning_rate": 2.988255028129512e-05, + "loss": 0.2584, + "step": 7744, + "teacher_loss": 0.2661294639110565 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.3756716549396515, + "learning_rate": 2.9882266443926766e-05, + "loss": 0.2908, + "step": 7745, + "teacher_loss": 0.28136372566223145 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.20233780145645142, + "learning_rate": 2.9881982265352665e-05, + "loss": 0.2132, + "step": 7746, + "teacher_loss": 0.21444067358970642 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.3564853072166443, + "learning_rate": 2.9881697745579323e-05, + "loss": 0.2937, + "step": 7747, + "teacher_loss": 0.2867465019226074 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.33353284001350403, + "learning_rate": 2.9881412884613273e-05, + "loss": 0.2546, + "step": 7748, + "teacher_loss": 0.24582603573799133 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.3640999495983124, + "learning_rate": 2.988112768246104e-05, + "loss": 0.3128, + "step": 7749, + "teacher_loss": 0.30711257457733154 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.3294028341770172, + "learning_rate": 2.9880842139129168e-05, + "loss": 0.2603, + "step": 7750, + "teacher_loss": 0.25257858633995056 + }, + { + "epoch": 1.4, + "eval_exact_match": 79.18637653736991, + "eval_f1": 86.78283582499546, + "step": 7750 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5814988017082214, + "learning_rate": 2.9880556254624202e-05, + "loss": 0.3444, + "step": 7751, + "teacher_loss": 0.31810107827186584 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5516996383666992, + "learning_rate": 2.988027002895269e-05, + "loss": 0.2156, + "step": 7752, + "teacher_loss": 0.17826640605926514 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.6024022102355957, + "learning_rate": 2.987998346212121e-05, + "loss": 0.3827, + "step": 7753, + "teacher_loss": 0.35823768377304077 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.47813841700553894, + "learning_rate": 2.987969655413631e-05, + "loss": 0.2159, + "step": 7754, + "teacher_loss": 0.18676459789276123 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.4733859598636627, + "learning_rate": 2.9879409305004582e-05, + "loss": 0.233, + "step": 7755, + "teacher_loss": 0.20633962750434875 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.40482449531555176, + "learning_rate": 2.9879121714732612e-05, + "loss": 0.2468, + "step": 7756, + "teacher_loss": 0.2292899489402771 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.17602097988128662, + "learning_rate": 2.9878833783326996e-05, + "loss": 0.1859, + "step": 7757, + "teacher_loss": 0.1870066225528717 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.41530680656433105, + "learning_rate": 2.9878545510794323e-05, + "loss": 0.3099, + "step": 7758, + "teacher_loss": 0.2982083261013031 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.4381055533885956, + "learning_rate": 2.9878256897141215e-05, + "loss": 0.2867, + "step": 7759, + "teacher_loss": 0.269903302192688 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.34129971265792847, + "learning_rate": 2.987796794237428e-05, + "loss": 0.3507, + "step": 7760, + "teacher_loss": 0.35173535346984863 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.4512649178504944, + "learning_rate": 2.9877678646500143e-05, + "loss": 0.3374, + "step": 7761, + "teacher_loss": 0.32472532987594604 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.7364459037780762, + "learning_rate": 2.9877389009525447e-05, + "loss": 0.37, + "step": 7762, + "teacher_loss": 0.32924115657806396 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.80064857006073, + "learning_rate": 2.987709903145682e-05, + "loss": 0.3103, + "step": 7763, + "teacher_loss": 0.2558046579360962 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.3515069782733917, + "learning_rate": 2.987680871230092e-05, + "loss": 0.1708, + "step": 7764, + "teacher_loss": 0.15071332454681396 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5223360061645508, + "learning_rate": 2.98765180520644e-05, + "loss": 0.2561, + "step": 7765, + "teacher_loss": 0.22649329900741577 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.6189290881156921, + "learning_rate": 2.987622705075392e-05, + "loss": 0.2922, + "step": 7766, + "teacher_loss": 0.2558940052986145 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.6466519832611084, + "learning_rate": 2.9875935708376156e-05, + "loss": 0.2874, + "step": 7767, + "teacher_loss": 0.24752295017242432 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.493502676486969, + "learning_rate": 2.9875644024937788e-05, + "loss": 0.4489, + "step": 7768, + "teacher_loss": 0.4439082741737366 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.43044495582580566, + "learning_rate": 2.9875352000445503e-05, + "loss": 0.2629, + "step": 7769, + "teacher_loss": 0.24429334700107574 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.2896115183830261, + "learning_rate": 2.9875059634905996e-05, + "loss": 0.3539, + "step": 7770, + "teacher_loss": 0.3609997034072876 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.26232093572616577, + "learning_rate": 2.987476692832596e-05, + "loss": 0.1882, + "step": 7771, + "teacher_loss": 0.1799980252981186 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.923363447189331, + "learning_rate": 2.9874473880712125e-05, + "loss": 0.4743, + "step": 7772, + "teacher_loss": 0.42437034845352173 + }, + { + "compression_loss": 0.0, + "epoch": 1.4, + "label_loss": 0.5126427412033081, + "learning_rate": 2.98741804920712e-05, + "loss": 0.3137, + "step": 7773, + "teacher_loss": 0.2916460633277893 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.904716432094574, + "learning_rate": 2.9873886762409904e-05, + "loss": 0.334, + "step": 7774, + "teacher_loss": 0.27061885595321655 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5211755633354187, + "learning_rate": 2.9873592691734985e-05, + "loss": 0.277, + "step": 7775, + "teacher_loss": 0.24991083145141602 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.32271963357925415, + "learning_rate": 2.9873298280053174e-05, + "loss": 0.261, + "step": 7776, + "teacher_loss": 0.2540992796421051 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.4167831540107727, + "learning_rate": 2.987300352737123e-05, + "loss": 0.2455, + "step": 7777, + "teacher_loss": 0.22651216387748718 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.2187052071094513, + "learning_rate": 2.9872708433695907e-05, + "loss": 0.1978, + "step": 7778, + "teacher_loss": 0.19549311697483063 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.23581178486347198, + "learning_rate": 2.9872412999033967e-05, + "loss": 0.3212, + "step": 7779, + "teacher_loss": 0.33072429895401 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.2800215482711792, + "learning_rate": 2.9872117223392188e-05, + "loss": 0.2174, + "step": 7780, + "teacher_loss": 0.21039703488349915 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.7383527755737305, + "learning_rate": 2.9871821106777354e-05, + "loss": 0.3076, + "step": 7781, + "teacher_loss": 0.2597929537296295 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3133199214935303, + "learning_rate": 2.987152464919624e-05, + "loss": 0.171, + "step": 7782, + "teacher_loss": 0.15519243478775024 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.7065719366073608, + "learning_rate": 2.9871227850655663e-05, + "loss": 0.3373, + "step": 7783, + "teacher_loss": 0.2962336838245392 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3674553632736206, + "learning_rate": 2.9870930711162413e-05, + "loss": 0.3061, + "step": 7784, + "teacher_loss": 0.2993176579475403 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5854167938232422, + "learning_rate": 2.9870633230723313e-05, + "loss": 0.2903, + "step": 7785, + "teacher_loss": 0.25753286480903625 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6971392035484314, + "learning_rate": 2.987033540934517e-05, + "loss": 0.8198, + "step": 7786, + "teacher_loss": 0.8333829641342163 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.4101884365081787, + "learning_rate": 2.9870037247034823e-05, + "loss": 0.2339, + "step": 7787, + "teacher_loss": 0.2142728716135025 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.2499900460243225, + "learning_rate": 2.9869738743799103e-05, + "loss": 0.3565, + "step": 7788, + "teacher_loss": 0.3682914972305298 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.38142943382263184, + "learning_rate": 2.9869439899644856e-05, + "loss": 0.3019, + "step": 7789, + "teacher_loss": 0.29301655292510986 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.290536493062973, + "learning_rate": 2.9869140714578934e-05, + "loss": 0.2271, + "step": 7790, + "teacher_loss": 0.2200368046760559 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.8257572054862976, + "learning_rate": 2.9868841188608196e-05, + "loss": 0.645, + "step": 7791, + "teacher_loss": 0.6249566078186035 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6436138153076172, + "learning_rate": 2.9868541321739508e-05, + "loss": 0.4424, + "step": 7792, + "teacher_loss": 0.42009642720222473 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.8447271585464478, + "learning_rate": 2.9868241113979744e-05, + "loss": 0.3683, + "step": 7793, + "teacher_loss": 0.3154064416885376 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.41758328676223755, + "learning_rate": 2.9867940565335788e-05, + "loss": 0.2529, + "step": 7794, + "teacher_loss": 0.23465704917907715 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5974828004837036, + "learning_rate": 2.9867639675814532e-05, + "loss": 0.4327, + "step": 7795, + "teacher_loss": 0.41433846950531006 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.41767778992652893, + "learning_rate": 2.9867338445422875e-05, + "loss": 0.3602, + "step": 7796, + "teacher_loss": 0.35381531715393066 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3344104290008545, + "learning_rate": 2.986703687416772e-05, + "loss": 0.2141, + "step": 7797, + "teacher_loss": 0.2007756531238556 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3764779567718506, + "learning_rate": 2.986673496205599e-05, + "loss": 0.2701, + "step": 7798, + "teacher_loss": 0.25832122564315796 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.9717959761619568, + "learning_rate": 2.986643270909459e-05, + "loss": 0.3898, + "step": 7799, + "teacher_loss": 0.32515549659729004 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.45768409967422485, + "learning_rate": 2.9866130115290468e-05, + "loss": 0.2276, + "step": 7800, + "teacher_loss": 0.2020527422428131 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5572268962860107, + "learning_rate": 2.986582718065055e-05, + "loss": 0.2723, + "step": 7801, + "teacher_loss": 0.24064376950263977 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6474494934082031, + "learning_rate": 2.9865523905181786e-05, + "loss": 0.3292, + "step": 7802, + "teacher_loss": 0.2938896417617798 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6471384763717651, + "learning_rate": 2.9865220288891125e-05, + "loss": 0.2702, + "step": 7803, + "teacher_loss": 0.22832253575325012 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.7434492111206055, + "learning_rate": 2.9864916331785533e-05, + "loss": 0.4546, + "step": 7804, + "teacher_loss": 0.4225079417228699 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6822161674499512, + "learning_rate": 2.986461203387198e-05, + "loss": 0.4149, + "step": 7805, + "teacher_loss": 0.385237455368042 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.4707350432872772, + "learning_rate": 2.9864307395157435e-05, + "loss": 0.268, + "step": 7806, + "teacher_loss": 0.2454976737499237 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3236408829689026, + "learning_rate": 2.986400241564889e-05, + "loss": 0.2494, + "step": 7807, + "teacher_loss": 0.2411423772573471 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.2281387746334076, + "learning_rate": 2.986369709535333e-05, + "loss": 0.1985, + "step": 7808, + "teacher_loss": 0.19520670175552368 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.390608012676239, + "learning_rate": 2.986339143427776e-05, + "loss": 0.2949, + "step": 7809, + "teacher_loss": 0.2843020558357239 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.2657642364501953, + "learning_rate": 2.9863085432429193e-05, + "loss": 0.2524, + "step": 7810, + "teacher_loss": 0.25096291303634644 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.37380653619766235, + "learning_rate": 2.986277908981463e-05, + "loss": 0.2607, + "step": 7811, + "teacher_loss": 0.24807780981063843 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6848816275596619, + "learning_rate": 2.986247240644111e-05, + "loss": 0.3601, + "step": 7812, + "teacher_loss": 0.3239631652832031 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3223942816257477, + "learning_rate": 2.9862165382315657e-05, + "loss": 0.2079, + "step": 7813, + "teacher_loss": 0.19520601630210876 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.636390745639801, + "learning_rate": 2.986185801744531e-05, + "loss": 0.292, + "step": 7814, + "teacher_loss": 0.2536846995353699 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.6240343451499939, + "learning_rate": 2.9861550311837116e-05, + "loss": 0.2582, + "step": 7815, + "teacher_loss": 0.21750250458717346 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.7771161794662476, + "learning_rate": 2.9861242265498133e-05, + "loss": 0.3767, + "step": 7816, + "teacher_loss": 0.33225274085998535 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5588755011558533, + "learning_rate": 2.986093387843542e-05, + "loss": 0.3765, + "step": 7817, + "teacher_loss": 0.3562135696411133 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.3828737139701843, + "learning_rate": 2.9860625150656043e-05, + "loss": 0.2498, + "step": 7818, + "teacher_loss": 0.2350327968597412 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5769387483596802, + "learning_rate": 2.9860316082167095e-05, + "loss": 0.2832, + "step": 7819, + "teacher_loss": 0.25057876110076904 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.2763219475746155, + "learning_rate": 2.9860006672975647e-05, + "loss": 0.283, + "step": 7820, + "teacher_loss": 0.28378212451934814 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.24730199575424194, + "learning_rate": 2.9859696923088802e-05, + "loss": 0.2396, + "step": 7821, + "teacher_loss": 0.23874951899051666 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.29271623492240906, + "learning_rate": 2.9859386832513656e-05, + "loss": 0.1817, + "step": 7822, + "teacher_loss": 0.16936182975769043 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5235801935195923, + "learning_rate": 2.985907640125732e-05, + "loss": 0.3151, + "step": 7823, + "teacher_loss": 0.29196396470069885 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.5830715298652649, + "learning_rate": 2.9858765629326913e-05, + "loss": 0.2402, + "step": 7824, + "teacher_loss": 0.20212921500205994 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.35706770420074463, + "learning_rate": 2.985845451672956e-05, + "loss": 0.3127, + "step": 7825, + "teacher_loss": 0.3078124225139618 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.28731000423431396, + "learning_rate": 2.9858143063472392e-05, + "loss": 0.2032, + "step": 7826, + "teacher_loss": 0.19384275376796722 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.8478162288665771, + "learning_rate": 2.985783126956255e-05, + "loss": 0.2882, + "step": 7827, + "teacher_loss": 0.22604668140411377 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.534031867980957, + "learning_rate": 2.9857519135007184e-05, + "loss": 0.5821, + "step": 7828, + "teacher_loss": 0.5874625444412231 + }, + { + "compression_loss": 0.0, + "epoch": 1.41, + "label_loss": 0.35257723927497864, + "learning_rate": 2.9857206659813447e-05, + "loss": 0.2564, + "step": 7829, + "teacher_loss": 0.24576786160469055 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.30685386061668396, + "learning_rate": 2.9856893843988507e-05, + "loss": 0.2961, + "step": 7830, + "teacher_loss": 0.29489296674728394 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.44119954109191895, + "learning_rate": 2.9856580687539537e-05, + "loss": 0.3053, + "step": 7831, + "teacher_loss": 0.2902168333530426 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.6479071974754333, + "learning_rate": 2.985626719047371e-05, + "loss": 0.3109, + "step": 7832, + "teacher_loss": 0.27346375584602356 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.47327619791030884, + "learning_rate": 2.9855953352798223e-05, + "loss": 0.3112, + "step": 7833, + "teacher_loss": 0.2931399345397949 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.8096597194671631, + "learning_rate": 2.9855639174520262e-05, + "loss": 0.3847, + "step": 7834, + "teacher_loss": 0.33747875690460205 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.29229435324668884, + "learning_rate": 2.9855324655647036e-05, + "loss": 0.2022, + "step": 7835, + "teacher_loss": 0.19222742319107056 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.40433692932128906, + "learning_rate": 2.9855009796185752e-05, + "loss": 0.281, + "step": 7836, + "teacher_loss": 0.2672778367996216 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.6754491925239563, + "learning_rate": 2.9854694596143632e-05, + "loss": 0.4032, + "step": 7837, + "teacher_loss": 0.37299975752830505 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.17516176402568817, + "learning_rate": 2.98543790555279e-05, + "loss": 0.219, + "step": 7838, + "teacher_loss": 0.22389857470989227 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.570223331451416, + "learning_rate": 2.98540631743458e-05, + "loss": 0.2323, + "step": 7839, + "teacher_loss": 0.19474822282791138 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.4763471484184265, + "learning_rate": 2.9853746952604556e-05, + "loss": 0.2416, + "step": 7840, + "teacher_loss": 0.21551844477653503 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.45642226934432983, + "learning_rate": 2.9853430390311434e-05, + "loss": 0.2849, + "step": 7841, + "teacher_loss": 0.26583221554756165 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.7567011117935181, + "learning_rate": 2.985311348747368e-05, + "loss": 0.2952, + "step": 7842, + "teacher_loss": 0.2439035177230835 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.23512735962867737, + "learning_rate": 2.9852796244098574e-05, + "loss": 0.211, + "step": 7843, + "teacher_loss": 0.20826426148414612 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.5450301170349121, + "learning_rate": 2.9852478660193375e-05, + "loss": 0.239, + "step": 7844, + "teacher_loss": 0.20494484901428223 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.34329545497894287, + "learning_rate": 2.9852160735765374e-05, + "loss": 0.3377, + "step": 7845, + "teacher_loss": 0.33705127239227295 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.9214147925376892, + "learning_rate": 2.9851842470821854e-05, + "loss": 0.4034, + "step": 7846, + "teacher_loss": 0.3458458483219147 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.4630696773529053, + "learning_rate": 2.9851523865370113e-05, + "loss": 0.2512, + "step": 7847, + "teacher_loss": 0.22761225700378418 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.40053293108940125, + "learning_rate": 2.9851204919417463e-05, + "loss": 0.1915, + "step": 7848, + "teacher_loss": 0.16827738285064697 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.9158337116241455, + "learning_rate": 2.9850885632971202e-05, + "loss": 0.3364, + "step": 7849, + "teacher_loss": 0.27197355031967163 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.2564816474914551, + "learning_rate": 2.985056600603867e-05, + "loss": 0.3, + "step": 7850, + "teacher_loss": 0.30478712916374207 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.5917198657989502, + "learning_rate": 2.9850246038627172e-05, + "loss": 0.3044, + "step": 7851, + "teacher_loss": 0.2724647521972656 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.3412782549858093, + "learning_rate": 2.9849925730744064e-05, + "loss": 0.2767, + "step": 7852, + "teacher_loss": 0.2694881856441498 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.21820154786109924, + "learning_rate": 2.9849605082396678e-05, + "loss": 0.2904, + "step": 7853, + "teacher_loss": 0.2984505891799927 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.47932130098342896, + "learning_rate": 2.984928409359237e-05, + "loss": 0.3589, + "step": 7854, + "teacher_loss": 0.3455348610877991 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.4179157614707947, + "learning_rate": 2.9848962764338497e-05, + "loss": 0.3351, + "step": 7855, + "teacher_loss": 0.3259321451187134 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.3651089668273926, + "learning_rate": 2.9848641094642423e-05, + "loss": 0.2096, + "step": 7856, + "teacher_loss": 0.19228777289390564 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.5287426710128784, + "learning_rate": 2.9848319084511535e-05, + "loss": 0.4421, + "step": 7857, + "teacher_loss": 0.4324356019496918 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.2561258375644684, + "learning_rate": 2.9847996733953204e-05, + "loss": 0.1774, + "step": 7858, + "teacher_loss": 0.1686631143093109 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.47791463136672974, + "learning_rate": 2.9847674042974828e-05, + "loss": 0.2425, + "step": 7859, + "teacher_loss": 0.21635472774505615 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.4318338632583618, + "learning_rate": 2.9847351011583796e-05, + "loss": 0.233, + "step": 7860, + "teacher_loss": 0.21094997227191925 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.44735851883888245, + "learning_rate": 2.9847027639787524e-05, + "loss": 0.2076, + "step": 7861, + "teacher_loss": 0.1809990555047989 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.39480364322662354, + "learning_rate": 2.984670392759342e-05, + "loss": 0.2312, + "step": 7862, + "teacher_loss": 0.21304309368133545 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.4058953523635864, + "learning_rate": 2.9846379875008906e-05, + "loss": 0.2499, + "step": 7863, + "teacher_loss": 0.23261000216007233 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.48555833101272583, + "learning_rate": 2.9846055482041418e-05, + "loss": 0.3168, + "step": 7864, + "teacher_loss": 0.2980100214481354 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.4436887502670288, + "learning_rate": 2.9845730748698385e-05, + "loss": 0.2697, + "step": 7865, + "teacher_loss": 0.2504180669784546 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.5427103042602539, + "learning_rate": 2.984540567498726e-05, + "loss": 0.3442, + "step": 7866, + "teacher_loss": 0.322147011756897 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 1.2021923065185547, + "learning_rate": 2.9845080260915484e-05, + "loss": 0.5085, + "step": 7867, + "teacher_loss": 0.4314666986465454 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.5192672610282898, + "learning_rate": 2.9844754506490534e-05, + "loss": 0.3265, + "step": 7868, + "teacher_loss": 0.30503201484680176 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.22259517014026642, + "learning_rate": 2.9844428411719864e-05, + "loss": 0.2632, + "step": 7869, + "teacher_loss": 0.26768654584884644 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.7509715557098389, + "learning_rate": 2.984410197661096e-05, + "loss": 0.2806, + "step": 7870, + "teacher_loss": 0.2283553034067154 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.6299173831939697, + "learning_rate": 2.9843775201171303e-05, + "loss": 0.3198, + "step": 7871, + "teacher_loss": 0.2853153347969055 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.3831256926059723, + "learning_rate": 2.9843448085408383e-05, + "loss": 0.3328, + "step": 7872, + "teacher_loss": 0.3272198438644409 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.8916932344436646, + "learning_rate": 2.9843120629329698e-05, + "loss": 0.3202, + "step": 7873, + "teacher_loss": 0.2566462457180023 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.2449224889278412, + "learning_rate": 2.9842792832942764e-05, + "loss": 0.2767, + "step": 7874, + "teacher_loss": 0.28023597598075867 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.775907576084137, + "learning_rate": 2.984246469625509e-05, + "loss": 0.5577, + "step": 7875, + "teacher_loss": 0.5334750413894653 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.3195655345916748, + "learning_rate": 2.98421362192742e-05, + "loss": 0.2053, + "step": 7876, + "teacher_loss": 0.1925676017999649 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.3903654217720032, + "learning_rate": 2.9841807402007622e-05, + "loss": 0.2715, + "step": 7877, + "teacher_loss": 0.25829219818115234 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.48247915506362915, + "learning_rate": 2.9841478244462906e-05, + "loss": 0.3351, + "step": 7878, + "teacher_loss": 0.31874769926071167 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.779265820980072, + "learning_rate": 2.984114874664759e-05, + "loss": 0.3147, + "step": 7879, + "teacher_loss": 0.2630445063114166 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.6541645526885986, + "learning_rate": 2.9840818908569223e-05, + "loss": 0.2538, + "step": 7880, + "teacher_loss": 0.20933130383491516 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.26947706937789917, + "learning_rate": 2.9840488730235378e-05, + "loss": 0.228, + "step": 7881, + "teacher_loss": 0.22342851758003235 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.32322901487350464, + "learning_rate": 2.9840158211653616e-05, + "loss": 0.1679, + "step": 7882, + "teacher_loss": 0.150656059384346 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.5273176431655884, + "learning_rate": 2.9839827352831522e-05, + "loss": 0.272, + "step": 7883, + "teacher_loss": 0.24364562332630157 + }, + { + "compression_loss": 0.0, + "epoch": 1.42, + "label_loss": 0.3925158381462097, + "learning_rate": 2.983949615377668e-05, + "loss": 0.2073, + "step": 7884, + "teacher_loss": 0.18673306703567505 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3156322240829468, + "learning_rate": 2.9839164614496686e-05, + "loss": 0.2322, + "step": 7885, + "teacher_loss": 0.222874253988266 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.7113157510757446, + "learning_rate": 2.9838832734999132e-05, + "loss": 0.2776, + "step": 7886, + "teacher_loss": 0.22939878702163696 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.8962811827659607, + "learning_rate": 2.9838500515291632e-05, + "loss": 0.6153, + "step": 7887, + "teacher_loss": 0.584083616733551 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.25298309326171875, + "learning_rate": 2.983816795538181e-05, + "loss": 0.2162, + "step": 7888, + "teacher_loss": 0.21214696764945984 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.2882899343967438, + "learning_rate": 2.9837835055277275e-05, + "loss": 0.2064, + "step": 7889, + "teacher_loss": 0.19726887345314026 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.22818830609321594, + "learning_rate": 2.983750181498567e-05, + "loss": 0.2435, + "step": 7890, + "teacher_loss": 0.24517516791820526 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.43428054451942444, + "learning_rate": 2.983716823451464e-05, + "loss": 0.3154, + "step": 7891, + "teacher_loss": 0.30220216512680054 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 1.1805777549743652, + "learning_rate": 2.983683431387182e-05, + "loss": 0.4844, + "step": 7892, + "teacher_loss": 0.40702199935913086 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.46497368812561035, + "learning_rate": 2.9836500053064874e-05, + "loss": 0.2096, + "step": 7893, + "teacher_loss": 0.18124695122241974 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.337495356798172, + "learning_rate": 2.9836165452101466e-05, + "loss": 0.2125, + "step": 7894, + "teacher_loss": 0.19866225123405457 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3568882346153259, + "learning_rate": 2.9835830510989267e-05, + "loss": 0.2708, + "step": 7895, + "teacher_loss": 0.26125368475914 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3126126527786255, + "learning_rate": 2.9835495229735948e-05, + "loss": 0.2078, + "step": 7896, + "teacher_loss": 0.19613653421401978 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.5350156426429749, + "learning_rate": 2.9835159608349207e-05, + "loss": 0.3208, + "step": 7897, + "teacher_loss": 0.29700711369514465 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.4463285505771637, + "learning_rate": 2.9834823646836735e-05, + "loss": 0.3199, + "step": 7898, + "teacher_loss": 0.305867075920105 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.6754963397979736, + "learning_rate": 2.983448734520623e-05, + "loss": 0.2316, + "step": 7899, + "teacher_loss": 0.18229299783706665 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.24778440594673157, + "learning_rate": 2.9834150703465406e-05, + "loss": 0.1789, + "step": 7900, + "teacher_loss": 0.1712363213300705 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.4365927577018738, + "learning_rate": 2.9833813721621985e-05, + "loss": 0.2524, + "step": 7901, + "teacher_loss": 0.23193974792957306 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.4043685793876648, + "learning_rate": 2.9833476399683686e-05, + "loss": 0.3163, + "step": 7902, + "teacher_loss": 0.30646079778671265 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.6264365911483765, + "learning_rate": 2.9833138737658254e-05, + "loss": 0.4169, + "step": 7903, + "teacher_loss": 0.39363518357276917 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.554568886756897, + "learning_rate": 2.9832800735553416e-05, + "loss": 0.2534, + "step": 7904, + "teacher_loss": 0.2199532687664032 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.15444108843803406, + "learning_rate": 2.9832462393376926e-05, + "loss": 0.1933, + "step": 7905, + "teacher_loss": 0.1975642293691635 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.7915629148483276, + "learning_rate": 2.9832123711136548e-05, + "loss": 0.6131, + "step": 7906, + "teacher_loss": 0.5933099985122681 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.13332098722457886, + "learning_rate": 2.9831784688840045e-05, + "loss": 0.2188, + "step": 7907, + "teacher_loss": 0.22826996445655823 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.38115742802619934, + "learning_rate": 2.983144532649518e-05, + "loss": 0.3007, + "step": 7908, + "teacher_loss": 0.29181087017059326 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.8646666407585144, + "learning_rate": 2.9831105624109746e-05, + "loss": 0.3413, + "step": 7909, + "teacher_loss": 0.2831355035305023 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.6045562028884888, + "learning_rate": 2.983076558169152e-05, + "loss": 0.3748, + "step": 7910, + "teacher_loss": 0.34926527738571167 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.5086529850959778, + "learning_rate": 2.983042519924831e-05, + "loss": 0.2572, + "step": 7911, + "teacher_loss": 0.2292218655347824 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.7204261422157288, + "learning_rate": 2.983008447678791e-05, + "loss": 0.2769, + "step": 7912, + "teacher_loss": 0.2275742143392563 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.2638428509235382, + "learning_rate": 2.982974341431814e-05, + "loss": 0.3251, + "step": 7913, + "teacher_loss": 0.33185291290283203 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.22108659148216248, + "learning_rate": 2.9829402011846814e-05, + "loss": 0.2596, + "step": 7914, + "teacher_loss": 0.26391515135765076 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3795558214187622, + "learning_rate": 2.9829060269381762e-05, + "loss": 0.237, + "step": 7915, + "teacher_loss": 0.221209317445755 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.2602407932281494, + "learning_rate": 2.9828718186930817e-05, + "loss": 0.1896, + "step": 7916, + "teacher_loss": 0.18179252743721008 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.48754292726516724, + "learning_rate": 2.982837576450182e-05, + "loss": 0.237, + "step": 7917, + "teacher_loss": 0.2091621607542038 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.22752819955348969, + "learning_rate": 2.9828033002102624e-05, + "loss": 0.1654, + "step": 7918, + "teacher_loss": 0.15851053595542908 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.5340484380722046, + "learning_rate": 2.9827689899741093e-05, + "loss": 0.3343, + "step": 7919, + "teacher_loss": 0.312160849571228 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.4123237431049347, + "learning_rate": 2.9827346457425087e-05, + "loss": 0.3289, + "step": 7920, + "teacher_loss": 0.3196706175804138 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3904937505722046, + "learning_rate": 2.9827002675162478e-05, + "loss": 0.2062, + "step": 7921, + "teacher_loss": 0.18577435612678528 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.2816625237464905, + "learning_rate": 2.9826658552961155e-05, + "loss": 0.232, + "step": 7922, + "teacher_loss": 0.22650131583213806 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.7039488554000854, + "learning_rate": 2.9826314090828997e-05, + "loss": 0.2693, + "step": 7923, + "teacher_loss": 0.22104613482952118 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.5160956978797913, + "learning_rate": 2.982596928877392e-05, + "loss": 0.2125, + "step": 7924, + "teacher_loss": 0.17876680195331573 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3103431761264801, + "learning_rate": 2.9825624146803807e-05, + "loss": 0.2249, + "step": 7925, + "teacher_loss": 0.2153695821762085 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.45536044239997864, + "learning_rate": 2.9825278664926587e-05, + "loss": 0.2721, + "step": 7926, + "teacher_loss": 0.25169819593429565 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.42669031023979187, + "learning_rate": 2.9824932843150176e-05, + "loss": 0.2933, + "step": 7927, + "teacher_loss": 0.2784522771835327 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.23641017079353333, + "learning_rate": 2.9824586681482503e-05, + "loss": 0.2418, + "step": 7928, + "teacher_loss": 0.24239802360534668 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.257524698972702, + "learning_rate": 2.9824240179931503e-05, + "loss": 0.2315, + "step": 7929, + "teacher_loss": 0.22863918542861938 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.9684820175170898, + "learning_rate": 2.9823893338505118e-05, + "loss": 0.2808, + "step": 7930, + "teacher_loss": 0.20437420904636383 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.44187670946121216, + "learning_rate": 2.9823546157211303e-05, + "loss": 0.2328, + "step": 7931, + "teacher_loss": 0.2095218002796173 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.22811956703662872, + "learning_rate": 2.9823198636058023e-05, + "loss": 0.2742, + "step": 7932, + "teacher_loss": 0.27927637100219727 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.8179506063461304, + "learning_rate": 2.9822850775053238e-05, + "loss": 0.8288, + "step": 7933, + "teacher_loss": 0.8300375938415527 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3418976664543152, + "learning_rate": 2.9822502574204926e-05, + "loss": 0.2335, + "step": 7934, + "teacher_loss": 0.22149452567100525 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.8048443794250488, + "learning_rate": 2.9822154033521073e-05, + "loss": 0.3877, + "step": 7935, + "teacher_loss": 0.34130239486694336 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.3619652986526489, + "learning_rate": 2.982180515300966e-05, + "loss": 0.2452, + "step": 7936, + "teacher_loss": 0.23223735392093658 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.6818567514419556, + "learning_rate": 2.9821455932678698e-05, + "loss": 0.5385, + "step": 7937, + "teacher_loss": 0.5225313305854797 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.33271127939224243, + "learning_rate": 2.9821106372536188e-05, + "loss": 0.2595, + "step": 7938, + "teacher_loss": 0.2513582110404968 + }, + { + "compression_loss": 0.0, + "epoch": 1.43, + "label_loss": 0.4131905138492584, + "learning_rate": 2.982075647259014e-05, + "loss": 0.1945, + "step": 7939, + "teacher_loss": 0.17024609446525574 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.27992552518844604, + "learning_rate": 2.9820406232848588e-05, + "loss": 0.2135, + "step": 7940, + "teacher_loss": 0.2061430811882019 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.12883847951889038, + "learning_rate": 2.9820055653319554e-05, + "loss": 0.2643, + "step": 7941, + "teacher_loss": 0.27935338020324707 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.42077189683914185, + "learning_rate": 2.9819704734011074e-05, + "loss": 0.3078, + "step": 7942, + "teacher_loss": 0.2952490448951721 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4479777216911316, + "learning_rate": 2.98193534749312e-05, + "loss": 0.2641, + "step": 7943, + "teacher_loss": 0.24369552731513977 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4536122977733612, + "learning_rate": 2.9819001876087976e-05, + "loss": 0.2769, + "step": 7944, + "teacher_loss": 0.2572559714317322 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6863450407981873, + "learning_rate": 2.9818649937489473e-05, + "loss": 0.2883, + "step": 7945, + "teacher_loss": 0.24410021305084229 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.794481635093689, + "learning_rate": 2.9818297659143754e-05, + "loss": 0.3573, + "step": 7946, + "teacher_loss": 0.3086785078048706 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 1.0190421342849731, + "learning_rate": 2.9817945041058897e-05, + "loss": 0.6772, + "step": 7947, + "teacher_loss": 0.6392534971237183 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.7421325445175171, + "learning_rate": 2.9817592083242988e-05, + "loss": 0.4406, + "step": 7948, + "teacher_loss": 0.40705233812332153 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5308027267456055, + "learning_rate": 2.9817238785704117e-05, + "loss": 0.2968, + "step": 7949, + "teacher_loss": 0.2708229422569275 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.38240405917167664, + "learning_rate": 2.9816885148450382e-05, + "loss": 0.3141, + "step": 7950, + "teacher_loss": 0.30646514892578125 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.2199104130268097, + "learning_rate": 2.98165311714899e-05, + "loss": 0.1864, + "step": 7951, + "teacher_loss": 0.18264034390449524 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6833811402320862, + "learning_rate": 2.9816176854830775e-05, + "loss": 0.3225, + "step": 7952, + "teacher_loss": 0.28236451745033264 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.23944562673568726, + "learning_rate": 2.981582219848114e-05, + "loss": 0.192, + "step": 7953, + "teacher_loss": 0.18677642941474915 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4406251311302185, + "learning_rate": 2.981546720244912e-05, + "loss": 0.3937, + "step": 7954, + "teacher_loss": 0.3884353041648865 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4613821804523468, + "learning_rate": 2.9815111866742857e-05, + "loss": 0.3229, + "step": 7955, + "teacher_loss": 0.3075345754623413 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6692880392074585, + "learning_rate": 2.9814756191370497e-05, + "loss": 0.2306, + "step": 7956, + "teacher_loss": 0.181819885969162 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.7955378293991089, + "learning_rate": 2.981440017634019e-05, + "loss": 0.2622, + "step": 7957, + "teacher_loss": 0.2028876692056656 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.47293224930763245, + "learning_rate": 2.981404382166011e-05, + "loss": 0.3613, + "step": 7958, + "teacher_loss": 0.34889277815818787 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6163287162780762, + "learning_rate": 2.9813687127338417e-05, + "loss": 0.2774, + "step": 7959, + "teacher_loss": 0.2397189438343048 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.32954472303390503, + "learning_rate": 2.981333009338329e-05, + "loss": 0.2237, + "step": 7960, + "teacher_loss": 0.2118997424840927 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.7575712203979492, + "learning_rate": 2.981297271980292e-05, + "loss": 0.3376, + "step": 7961, + "teacher_loss": 0.290883868932724 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.37359052896499634, + "learning_rate": 2.9812615006605492e-05, + "loss": 0.2862, + "step": 7962, + "teacher_loss": 0.27650186419487 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.545821487903595, + "learning_rate": 2.9812256953799216e-05, + "loss": 0.21, + "step": 7963, + "teacher_loss": 0.17267107963562012 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6322165727615356, + "learning_rate": 2.98118985613923e-05, + "loss": 0.5668, + "step": 7964, + "teacher_loss": 0.5595746040344238 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.7418209314346313, + "learning_rate": 2.9811539829392955e-05, + "loss": 0.4326, + "step": 7965, + "teacher_loss": 0.39820894598960876 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.3942718803882599, + "learning_rate": 2.981118075780941e-05, + "loss": 0.2968, + "step": 7966, + "teacher_loss": 0.2860025465488434 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.9183347225189209, + "learning_rate": 2.9810821346649894e-05, + "loss": 0.4469, + "step": 7967, + "teacher_loss": 0.39446988701820374 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.1843469887971878, + "learning_rate": 2.9810461595922653e-05, + "loss": 0.1771, + "step": 7968, + "teacher_loss": 0.1763322651386261 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.3399212062358856, + "learning_rate": 2.9810101505635932e-05, + "loss": 0.2654, + "step": 7969, + "teacher_loss": 0.2571086287498474 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5177813172340393, + "learning_rate": 2.9809741075797982e-05, + "loss": 0.244, + "step": 7970, + "teacher_loss": 0.21362170577049255 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5190725326538086, + "learning_rate": 2.9809380306417074e-05, + "loss": 0.2214, + "step": 7971, + "teacher_loss": 0.18837712705135345 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5994565486907959, + "learning_rate": 2.9809019197501477e-05, + "loss": 0.2474, + "step": 7972, + "teacher_loss": 0.2082386165857315 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 1.2282960414886475, + "learning_rate": 2.9808657749059466e-05, + "loss": 0.4175, + "step": 7973, + "teacher_loss": 0.3274487257003784 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4580203890800476, + "learning_rate": 2.9808295961099337e-05, + "loss": 0.3755, + "step": 7974, + "teacher_loss": 0.3663333058357239 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.36321693658828735, + "learning_rate": 2.9807933833629376e-05, + "loss": 0.2814, + "step": 7975, + "teacher_loss": 0.2722950279712677 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.3472549021244049, + "learning_rate": 2.980757136665789e-05, + "loss": 0.265, + "step": 7976, + "teacher_loss": 0.2558647394180298 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6636838316917419, + "learning_rate": 2.9807208560193188e-05, + "loss": 0.2673, + "step": 7977, + "teacher_loss": 0.22326692938804626 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.2975034713745117, + "learning_rate": 2.9806845414243588e-05, + "loss": 0.1932, + "step": 7978, + "teacher_loss": 0.1816408634185791 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4268759489059448, + "learning_rate": 2.9806481928817415e-05, + "loss": 0.2559, + "step": 7979, + "teacher_loss": 0.23685908317565918 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5061647891998291, + "learning_rate": 2.9806118103923003e-05, + "loss": 0.2773, + "step": 7980, + "teacher_loss": 0.25187763571739197 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6764490604400635, + "learning_rate": 2.980575393956869e-05, + "loss": 0.4442, + "step": 7981, + "teacher_loss": 0.41834330558776855 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.17975282669067383, + "learning_rate": 2.980538943576284e-05, + "loss": 0.1594, + "step": 7982, + "teacher_loss": 0.1571287214756012 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.8709560632705688, + "learning_rate": 2.9805024592513786e-05, + "loss": 0.2641, + "step": 7983, + "teacher_loss": 0.19664371013641357 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5154306888580322, + "learning_rate": 2.9804659409829916e-05, + "loss": 0.3812, + "step": 7984, + "teacher_loss": 0.366298645734787 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.3517243564128876, + "learning_rate": 2.9804293887719588e-05, + "loss": 0.2012, + "step": 7985, + "teacher_loss": 0.184498131275177 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.8047251105308533, + "learning_rate": 2.9803928026191188e-05, + "loss": 0.2782, + "step": 7986, + "teacher_loss": 0.21974410116672516 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.3863036334514618, + "learning_rate": 2.98035618252531e-05, + "loss": 0.2414, + "step": 7987, + "teacher_loss": 0.22527649998664856 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.605420708656311, + "learning_rate": 2.980319528491373e-05, + "loss": 0.3649, + "step": 7988, + "teacher_loss": 0.33816656470298767 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.4079616069793701, + "learning_rate": 2.9802828405181468e-05, + "loss": 0.2325, + "step": 7989, + "teacher_loss": 0.2129765748977661 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.6925249695777893, + "learning_rate": 2.9802461186064734e-05, + "loss": 0.2933, + "step": 7990, + "teacher_loss": 0.24895219504833221 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.3809935450553894, + "learning_rate": 2.9802093627571943e-05, + "loss": 0.3306, + "step": 7991, + "teacher_loss": 0.3249974548816681 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.47359800338745117, + "learning_rate": 2.980172572971153e-05, + "loss": 0.2452, + "step": 7992, + "teacher_loss": 0.21978314220905304 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.2826244831085205, + "learning_rate": 2.980135749249192e-05, + "loss": 0.2041, + "step": 7993, + "teacher_loss": 0.19532069563865662 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.5362200140953064, + "learning_rate": 2.9800988915921553e-05, + "loss": 0.2948, + "step": 7994, + "teacher_loss": 0.2680273652076721 + }, + { + "compression_loss": 0.0, + "epoch": 1.44, + "label_loss": 0.2653762698173523, + "learning_rate": 2.9800620000008896e-05, + "loss": 0.1798, + "step": 7995, + "teacher_loss": 0.17032530903816223 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.5877405405044556, + "learning_rate": 2.9800250744762392e-05, + "loss": 0.3302, + "step": 7996, + "teacher_loss": 0.301611065864563 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.7703908681869507, + "learning_rate": 2.9799881150190516e-05, + "loss": 0.3748, + "step": 7997, + "teacher_loss": 0.3307967483997345 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.4675881862640381, + "learning_rate": 2.9799511216301733e-05, + "loss": 0.3589, + "step": 7998, + "teacher_loss": 0.3468117117881775 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.7271214723587036, + "learning_rate": 2.979914094310453e-05, + "loss": 0.2563, + "step": 7999, + "teacher_loss": 0.203986257314682 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.2982984781265259, + "learning_rate": 2.9798770330607393e-05, + "loss": 0.2335, + "step": 8000, + "teacher_loss": 0.2263200283050537 + }, + { + "epoch": 1.45, + "eval_exact_match": 79.29044465468307, + "eval_f1": 86.85736418641653, + "step": 8000 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.35950759053230286, + "learning_rate": 2.9798399378818826e-05, + "loss": 0.3886, + "step": 8001, + "teacher_loss": 0.39185500144958496 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.41313260793685913, + "learning_rate": 2.9798028087747323e-05, + "loss": 0.3724, + "step": 8002, + "teacher_loss": 0.36784258484840393 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.3457781970500946, + "learning_rate": 2.979765645740141e-05, + "loss": 0.291, + "step": 8003, + "teacher_loss": 0.28492817282676697 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.20942559838294983, + "learning_rate": 2.9797284487789594e-05, + "loss": 0.1923, + "step": 8004, + "teacher_loss": 0.190372496843338 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.16968312859535217, + "learning_rate": 2.9796912178920415e-05, + "loss": 0.1939, + "step": 8005, + "teacher_loss": 0.19657176733016968 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.8281893134117126, + "learning_rate": 2.97965395308024e-05, + "loss": 0.4514, + "step": 8006, + "teacher_loss": 0.4095839262008667 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.9052969217300415, + "learning_rate": 2.97961665434441e-05, + "loss": 0.3908, + "step": 8007, + "teacher_loss": 0.3336770534515381 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.2036474347114563, + "learning_rate": 2.9795793216854056e-05, + "loss": 0.2657, + "step": 8008, + "teacher_loss": 0.27257758378982544 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.3779197931289673, + "learning_rate": 2.9795419551040836e-05, + "loss": 0.3229, + "step": 8009, + "teacher_loss": 0.31682828068733215 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.49461743235588074, + "learning_rate": 2.9795045546013005e-05, + "loss": 0.2313, + "step": 8010, + "teacher_loss": 0.20204345881938934 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.526146411895752, + "learning_rate": 2.979467120177914e-05, + "loss": 0.2864, + "step": 8011, + "teacher_loss": 0.2597748041152954 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.15993425250053406, + "learning_rate": 2.9794296518347815e-05, + "loss": 0.1696, + "step": 8012, + "teacher_loss": 0.17064236104488373 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.26443496346473694, + "learning_rate": 2.9793921495727632e-05, + "loss": 0.305, + "step": 8013, + "teacher_loss": 0.30951380729675293 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.46183067560195923, + "learning_rate": 2.979354613392718e-05, + "loss": 0.2742, + "step": 8014, + "teacher_loss": 0.25333696603775024 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.4355916380882263, + "learning_rate": 2.9793170432955073e-05, + "loss": 0.2571, + "step": 8015, + "teacher_loss": 0.23730865120887756 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.3521590232849121, + "learning_rate": 2.9792794392819916e-05, + "loss": 0.309, + "step": 8016, + "teacher_loss": 0.3041537404060364 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.3283768594264984, + "learning_rate": 2.9792418013530334e-05, + "loss": 0.2285, + "step": 8017, + "teacher_loss": 0.21738946437835693 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.709922194480896, + "learning_rate": 2.979204129509496e-05, + "loss": 0.3488, + "step": 8018, + "teacher_loss": 0.30863460898399353 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.44409239292144775, + "learning_rate": 2.9791664237522427e-05, + "loss": 0.3341, + "step": 8019, + "teacher_loss": 0.3218342065811157 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.48729509115219116, + "learning_rate": 2.979128684082138e-05, + "loss": 0.2053, + "step": 8020, + "teacher_loss": 0.1739344298839569 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.699779212474823, + "learning_rate": 2.9790909105000472e-05, + "loss": 0.3144, + "step": 8021, + "teacher_loss": 0.27154403924942017 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.42885515093803406, + "learning_rate": 2.9790531030068365e-05, + "loss": 0.2706, + "step": 8022, + "teacher_loss": 0.2529875338077545 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.4884800910949707, + "learning_rate": 2.9790152616033727e-05, + "loss": 0.2615, + "step": 8023, + "teacher_loss": 0.23624378442764282 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.9685729146003723, + "learning_rate": 2.9789773862905227e-05, + "loss": 0.3838, + "step": 8024, + "teacher_loss": 0.31884828209877014 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.6472843885421753, + "learning_rate": 2.9789394770691562e-05, + "loss": 0.2591, + "step": 8025, + "teacher_loss": 0.2159425914287567 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.13992975652217865, + "learning_rate": 2.9789015339401413e-05, + "loss": 0.1855, + "step": 8026, + "teacher_loss": 0.19050821661949158 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.3546307682991028, + "learning_rate": 2.978863556904348e-05, + "loss": 0.3637, + "step": 8027, + "teacher_loss": 0.3646918833255768 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.4858000874519348, + "learning_rate": 2.9788255459626472e-05, + "loss": 0.2482, + "step": 8028, + "teacher_loss": 0.22182008624076843 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.19023367762565613, + "learning_rate": 2.978787501115911e-05, + "loss": 0.2932, + "step": 8029, + "teacher_loss": 0.3046276867389679 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.4192965030670166, + "learning_rate": 2.97874942236501e-05, + "loss": 0.3362, + "step": 8030, + "teacher_loss": 0.3269474506378174 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.37812745571136475, + "learning_rate": 2.978711309710819e-05, + "loss": 0.2376, + "step": 8031, + "teacher_loss": 0.2220381796360016 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 1.0065064430236816, + "learning_rate": 2.978673163154211e-05, + "loss": 0.3249, + "step": 8032, + "teacher_loss": 0.24918562173843384 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.8626583814620972, + "learning_rate": 2.9786349826960605e-05, + "loss": 0.4275, + "step": 8033, + "teacher_loss": 0.3791157305240631 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.7173159718513489, + "learning_rate": 2.978596768337243e-05, + "loss": 0.2839, + "step": 8034, + "teacher_loss": 0.23575535416603088 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.2525938153266907, + "learning_rate": 2.978558520078635e-05, + "loss": 0.2642, + "step": 8035, + "teacher_loss": 0.26552778482437134 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.19853433966636658, + "learning_rate": 2.9785202379211124e-05, + "loss": 0.1818, + "step": 8036, + "teacher_loss": 0.17988526821136475 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.5615202188491821, + "learning_rate": 2.978481921865554e-05, + "loss": 0.3308, + "step": 8037, + "teacher_loss": 0.3051198124885559 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.5207582712173462, + "learning_rate": 2.9784435719128375e-05, + "loss": 0.3242, + "step": 8038, + "teacher_loss": 0.30238497257232666 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.2839233875274658, + "learning_rate": 2.978405188063843e-05, + "loss": 0.2516, + "step": 8039, + "teacher_loss": 0.24800646305084229 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.40185031294822693, + "learning_rate": 2.97836677031945e-05, + "loss": 0.3105, + "step": 8040, + "teacher_loss": 0.3003424406051636 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.6808096766471863, + "learning_rate": 2.978328318680539e-05, + "loss": 0.3619, + "step": 8041, + "teacher_loss": 0.32644736766815186 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.7205904722213745, + "learning_rate": 2.978289833147992e-05, + "loss": 0.3355, + "step": 8042, + "teacher_loss": 0.29274511337280273 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.6800805330276489, + "learning_rate": 2.9782513137226914e-05, + "loss": 0.302, + "step": 8043, + "teacher_loss": 0.2600025534629822 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.15397977828979492, + "learning_rate": 2.9782127604055205e-05, + "loss": 0.2564, + "step": 8044, + "teacher_loss": 0.2677498757839203 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.3411473035812378, + "learning_rate": 2.9781741731973627e-05, + "loss": 0.2552, + "step": 8045, + "teacher_loss": 0.24565398693084717 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.5428531765937805, + "learning_rate": 2.9781355520991026e-05, + "loss": 0.2793, + "step": 8046, + "teacher_loss": 0.25003957748413086 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.30829495191574097, + "learning_rate": 2.978096897111626e-05, + "loss": 0.2833, + "step": 8047, + "teacher_loss": 0.2805267572402954 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.18221524357795715, + "learning_rate": 2.9780582082358194e-05, + "loss": 0.1702, + "step": 8048, + "teacher_loss": 0.16888168454170227 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.15790969133377075, + "learning_rate": 2.9780194854725694e-05, + "loss": 0.1699, + "step": 8049, + "teacher_loss": 0.17128121852874756 + }, + { + "compression_loss": 0.0, + "epoch": 1.45, + "label_loss": 0.31405192613601685, + "learning_rate": 2.9779807288227638e-05, + "loss": 0.2155, + "step": 8050, + "teacher_loss": 0.20452788472175598 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.41470450162887573, + "learning_rate": 2.977941938287292e-05, + "loss": 0.213, + "step": 8051, + "teacher_loss": 0.1905650496482849 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.18562032282352448, + "learning_rate": 2.977903113867042e-05, + "loss": 0.1772, + "step": 8052, + "teacher_loss": 0.17628324031829834 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.25806769728660583, + "learning_rate": 2.9778642555629044e-05, + "loss": 0.2223, + "step": 8053, + "teacher_loss": 0.21837911009788513 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.37394577264785767, + "learning_rate": 2.977825363375771e-05, + "loss": 0.2505, + "step": 8054, + "teacher_loss": 0.2368081510066986 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.23483368754386902, + "learning_rate": 2.977786437306532e-05, + "loss": 0.1828, + "step": 8055, + "teacher_loss": 0.1770137995481491 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.9039406776428223, + "learning_rate": 2.977747477356081e-05, + "loss": 0.5028, + "step": 8056, + "teacher_loss": 0.45827025175094604 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.40942177176475525, + "learning_rate": 2.9777084835253107e-05, + "loss": 0.2495, + "step": 8057, + "teacher_loss": 0.23175999522209167 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.24341200292110443, + "learning_rate": 2.9776694558151154e-05, + "loss": 0.3094, + "step": 8058, + "teacher_loss": 0.3167455494403839 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3338346481323242, + "learning_rate": 2.97763039422639e-05, + "loss": 0.181, + "step": 8059, + "teacher_loss": 0.16400977969169617 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5458928942680359, + "learning_rate": 2.9775912987600294e-05, + "loss": 0.2939, + "step": 8060, + "teacher_loss": 0.26589053869247437 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.43793976306915283, + "learning_rate": 2.9775521694169305e-05, + "loss": 0.2587, + "step": 8061, + "teacher_loss": 0.23878324031829834 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.45369377732276917, + "learning_rate": 2.9775130061979906e-05, + "loss": 0.277, + "step": 8062, + "teacher_loss": 0.25732752680778503 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.28746169805526733, + "learning_rate": 2.977473809104107e-05, + "loss": 0.2161, + "step": 8063, + "teacher_loss": 0.2081322968006134 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.48836231231689453, + "learning_rate": 2.9774345781361785e-05, + "loss": 0.2358, + "step": 8064, + "teacher_loss": 0.2077031433582306 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.6063824892044067, + "learning_rate": 2.977395313295105e-05, + "loss": 0.2857, + "step": 8065, + "teacher_loss": 0.2500288188457489 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.9142793416976929, + "learning_rate": 2.9773560145817863e-05, + "loss": 0.4095, + "step": 8066, + "teacher_loss": 0.3534258008003235 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.241891011595726, + "learning_rate": 2.9773166819971236e-05, + "loss": 0.2059, + "step": 8067, + "teacher_loss": 0.20194904506206512 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5905392169952393, + "learning_rate": 2.9772773155420185e-05, + "loss": 0.3083, + "step": 8068, + "teacher_loss": 0.2769727408885956 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.4622535705566406, + "learning_rate": 2.9772379152173737e-05, + "loss": 0.2504, + "step": 8069, + "teacher_loss": 0.2268151491880417 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3854491710662842, + "learning_rate": 2.977198481024092e-05, + "loss": 0.3092, + "step": 8070, + "teacher_loss": 0.30074018239974976 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3202867805957794, + "learning_rate": 2.9771590129630787e-05, + "loss": 0.2392, + "step": 8071, + "teacher_loss": 0.23018880188465118 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3210011124610901, + "learning_rate": 2.977119511035238e-05, + "loss": 0.2447, + "step": 8072, + "teacher_loss": 0.2362537682056427 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3466269373893738, + "learning_rate": 2.9770799752414753e-05, + "loss": 0.2107, + "step": 8073, + "teacher_loss": 0.19565042853355408 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.6781039237976074, + "learning_rate": 2.9770404055826972e-05, + "loss": 0.2954, + "step": 8074, + "teacher_loss": 0.2528509795665741 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.38710817694664, + "learning_rate": 2.9770008020598113e-05, + "loss": 0.4723, + "step": 8075, + "teacher_loss": 0.4817160367965698 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5930625796318054, + "learning_rate": 2.9769611646737252e-05, + "loss": 0.27, + "step": 8076, + "teacher_loss": 0.2340492457151413 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.942062497138977, + "learning_rate": 2.9769214934253476e-05, + "loss": 0.5507, + "step": 8077, + "teacher_loss": 0.5071681141853333 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3186991214752197, + "learning_rate": 2.9768817883155882e-05, + "loss": 0.2878, + "step": 8078, + "teacher_loss": 0.28431880474090576 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5798431634902954, + "learning_rate": 2.9768420493453574e-05, + "loss": 0.3197, + "step": 8079, + "teacher_loss": 0.2908373773097992 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5199610590934753, + "learning_rate": 2.976802276515566e-05, + "loss": 0.2087, + "step": 8080, + "teacher_loss": 0.17414042353630066 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.4656398892402649, + "learning_rate": 2.9767624698271266e-05, + "loss": 0.3194, + "step": 8081, + "teacher_loss": 0.3031614422798157 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.7253479957580566, + "learning_rate": 2.9767226292809508e-05, + "loss": 0.2893, + "step": 8082, + "teacher_loss": 0.24085929989814758 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.7313870191574097, + "learning_rate": 2.9766827548779526e-05, + "loss": 0.3463, + "step": 8083, + "teacher_loss": 0.30356746912002563 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.44095784425735474, + "learning_rate": 2.976642846619046e-05, + "loss": 0.3108, + "step": 8084, + "teacher_loss": 0.296287477016449 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.21467992663383484, + "learning_rate": 2.976602904505147e-05, + "loss": 0.1758, + "step": 8085, + "teacher_loss": 0.17149582505226135 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5262818336486816, + "learning_rate": 2.97656292853717e-05, + "loss": 0.4367, + "step": 8086, + "teacher_loss": 0.42671999335289 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.2502528727054596, + "learning_rate": 2.9765229187160316e-05, + "loss": 0.236, + "step": 8087, + "teacher_loss": 0.23438510298728943 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.6763976812362671, + "learning_rate": 2.97648287504265e-05, + "loss": 0.3472, + "step": 8088, + "teacher_loss": 0.31064045429229736 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.18361902236938477, + "learning_rate": 2.9764427975179423e-05, + "loss": 0.1687, + "step": 8089, + "teacher_loss": 0.16703499853610992 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.4738154411315918, + "learning_rate": 2.9764026861428282e-05, + "loss": 0.3118, + "step": 8090, + "teacher_loss": 0.29374629259109497 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 1.1757898330688477, + "learning_rate": 2.976362540918227e-05, + "loss": 0.3953, + "step": 8091, + "teacher_loss": 0.30856889486312866 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.18476903438568115, + "learning_rate": 2.976322361845059e-05, + "loss": 0.2344, + "step": 8092, + "teacher_loss": 0.23991002142429352 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.7978906631469727, + "learning_rate": 2.976282148924246e-05, + "loss": 0.3944, + "step": 8093, + "teacher_loss": 0.34960877895355225 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.4356808662414551, + "learning_rate": 2.976241902156709e-05, + "loss": 0.629, + "step": 8094, + "teacher_loss": 0.6504708528518677 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.26538991928100586, + "learning_rate": 2.976201621543371e-05, + "loss": 0.2495, + "step": 8095, + "teacher_loss": 0.2477041780948639 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 1.0113760232925415, + "learning_rate": 2.976161307085156e-05, + "loss": 0.4517, + "step": 8096, + "teacher_loss": 0.38949209451675415 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5069959163665771, + "learning_rate": 2.9761209587829878e-05, + "loss": 0.2981, + "step": 8097, + "teacher_loss": 0.2749285101890564 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5862365365028381, + "learning_rate": 2.9760805766377916e-05, + "loss": 0.3175, + "step": 8098, + "teacher_loss": 0.28762370347976685 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.7402926683425903, + "learning_rate": 2.9760401606504938e-05, + "loss": 0.7622, + "step": 8099, + "teacher_loss": 0.7646390199661255 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.14903312921524048, + "learning_rate": 2.9759997108220197e-05, + "loss": 0.1663, + "step": 8100, + "teacher_loss": 0.16824749112129211 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.19166401028633118, + "learning_rate": 2.9759592271532986e-05, + "loss": 0.1904, + "step": 8101, + "teacher_loss": 0.19031283259391785 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.5603183507919312, + "learning_rate": 2.9759187096452566e-05, + "loss": 0.3008, + "step": 8102, + "teacher_loss": 0.2719833254814148 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3085275888442993, + "learning_rate": 2.975878158298824e-05, + "loss": 0.1975, + "step": 8103, + "teacher_loss": 0.1851949691772461 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.7284263372421265, + "learning_rate": 2.97583757311493e-05, + "loss": 0.2791, + "step": 8104, + "teacher_loss": 0.22922734916210175 + }, + { + "compression_loss": 0.0, + "epoch": 1.46, + "label_loss": 0.3495258390903473, + "learning_rate": 2.9757969540945056e-05, + "loss": 0.2671, + "step": 8105, + "teacher_loss": 0.257892906665802 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.2265910655260086, + "learning_rate": 2.9757563012384814e-05, + "loss": 0.2201, + "step": 8106, + "teacher_loss": 0.21936696767807007 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5853474140167236, + "learning_rate": 2.97571561454779e-05, + "loss": 0.2899, + "step": 8107, + "teacher_loss": 0.25712692737579346 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.4107155203819275, + "learning_rate": 2.9756748940233636e-05, + "loss": 0.2454, + "step": 8108, + "teacher_loss": 0.22698035836219788 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.8423839807510376, + "learning_rate": 2.9756341396661366e-05, + "loss": 0.3601, + "step": 8109, + "teacher_loss": 0.3064751923084259 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5393986701965332, + "learning_rate": 2.9755933514770428e-05, + "loss": 0.2565, + "step": 8110, + "teacher_loss": 0.22503072023391724 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 1.1462527513504028, + "learning_rate": 2.9755525294570173e-05, + "loss": 0.3114, + "step": 8111, + "teacher_loss": 0.21864129602909088 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.12924696505069733, + "learning_rate": 2.9755116736069964e-05, + "loss": 0.1881, + "step": 8112, + "teacher_loss": 0.19465124607086182 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.3674677610397339, + "learning_rate": 2.9754707839279164e-05, + "loss": 0.2111, + "step": 8113, + "teacher_loss": 0.19370847940444946 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.9885168075561523, + "learning_rate": 2.9754298604207157e-05, + "loss": 0.3503, + "step": 8114, + "teacher_loss": 0.27939724922180176 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.26287201046943665, + "learning_rate": 2.9753889030863312e-05, + "loss": 0.2467, + "step": 8115, + "teacher_loss": 0.24495232105255127 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.3919248580932617, + "learning_rate": 2.975347911925703e-05, + "loss": 0.275, + "step": 8116, + "teacher_loss": 0.26203691959381104 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.28359806537628174, + "learning_rate": 2.97530688693977e-05, + "loss": 0.2217, + "step": 8117, + "teacher_loss": 0.21486003696918488 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.35373297333717346, + "learning_rate": 2.9752658281294735e-05, + "loss": 0.2697, + "step": 8118, + "teacher_loss": 0.26036337018013 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.6003345847129822, + "learning_rate": 2.975224735495755e-05, + "loss": 0.3378, + "step": 8119, + "teacher_loss": 0.30861300230026245 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.26357048749923706, + "learning_rate": 2.975183609039556e-05, + "loss": 0.1683, + "step": 8120, + "teacher_loss": 0.15775799751281738 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.4701172709465027, + "learning_rate": 2.9751424487618196e-05, + "loss": 0.2958, + "step": 8121, + "teacher_loss": 0.2764098048210144 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.423725962638855, + "learning_rate": 2.9751012546634898e-05, + "loss": 0.3679, + "step": 8122, + "teacher_loss": 0.36166954040527344 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.37218284606933594, + "learning_rate": 2.9750600267455104e-05, + "loss": 0.1884, + "step": 8123, + "teacher_loss": 0.1679789125919342 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.28768423199653625, + "learning_rate": 2.9750187650088276e-05, + "loss": 0.2168, + "step": 8124, + "teacher_loss": 0.20887160301208496 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.6872828602790833, + "learning_rate": 2.9749774694543862e-05, + "loss": 0.327, + "step": 8125, + "teacher_loss": 0.2869381010532379 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.32114049792289734, + "learning_rate": 2.9749361400831342e-05, + "loss": 0.3068, + "step": 8126, + "teacher_loss": 0.30523765087127686 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.35265010595321655, + "learning_rate": 2.9748947768960183e-05, + "loss": 0.2821, + "step": 8127, + "teacher_loss": 0.27425771951675415 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5579265356063843, + "learning_rate": 2.9748533798939872e-05, + "loss": 0.2598, + "step": 8128, + "teacher_loss": 0.22672465443611145 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.15026262402534485, + "learning_rate": 2.9748119490779898e-05, + "loss": 0.189, + "step": 8129, + "teacher_loss": 0.1933566927909851 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.4982910752296448, + "learning_rate": 2.9747704844489762e-05, + "loss": 0.2855, + "step": 8130, + "teacher_loss": 0.26184138655662537 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.29063814878463745, + "learning_rate": 2.9747289860078975e-05, + "loss": 0.3159, + "step": 8131, + "teacher_loss": 0.31875449419021606 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5790538191795349, + "learning_rate": 2.974687453755704e-05, + "loss": 0.2737, + "step": 8132, + "teacher_loss": 0.23976552486419678 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5124067068099976, + "learning_rate": 2.9746458876933487e-05, + "loss": 0.2397, + "step": 8133, + "teacher_loss": 0.20935088396072388 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.2579716444015503, + "learning_rate": 2.9746042878217845e-05, + "loss": 0.2058, + "step": 8134, + "teacher_loss": 0.20004823803901672 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.29261016845703125, + "learning_rate": 2.9745626541419648e-05, + "loss": 0.2903, + "step": 8135, + "teacher_loss": 0.2900722324848175 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.35633546113967896, + "learning_rate": 2.974520986654845e-05, + "loss": 0.1845, + "step": 8136, + "teacher_loss": 0.16537439823150635 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.33052337169647217, + "learning_rate": 2.9744792853613792e-05, + "loss": 0.2629, + "step": 8137, + "teacher_loss": 0.25535738468170166 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.24920472502708435, + "learning_rate": 2.9744375502625242e-05, + "loss": 0.2341, + "step": 8138, + "teacher_loss": 0.23240327835083008 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.423315167427063, + "learning_rate": 2.974395781359237e-05, + "loss": 0.39, + "step": 8139, + "teacher_loss": 0.3863159418106079 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.30177706480026245, + "learning_rate": 2.9743539786524746e-05, + "loss": 0.2877, + "step": 8140, + "teacher_loss": 0.2861484885215759 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5094630718231201, + "learning_rate": 2.9743121421431963e-05, + "loss": 0.3482, + "step": 8141, + "teacher_loss": 0.3303123116493225 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5499067306518555, + "learning_rate": 2.9742702718323605e-05, + "loss": 0.2394, + "step": 8142, + "teacher_loss": 0.20485320687294006 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.5517386794090271, + "learning_rate": 2.9742283677209277e-05, + "loss": 0.2111, + "step": 8143, + "teacher_loss": 0.17326796054840088 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.48349255323410034, + "learning_rate": 2.9741864298098584e-05, + "loss": 0.3237, + "step": 8144, + "teacher_loss": 0.3059951066970825 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.25946253538131714, + "learning_rate": 2.9741444581001135e-05, + "loss": 0.1857, + "step": 8145, + "teacher_loss": 0.17752626538276672 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.8467333316802979, + "learning_rate": 2.9741024525926566e-05, + "loss": 0.7973, + "step": 8146, + "teacher_loss": 0.7918380498886108 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.21400555968284607, + "learning_rate": 2.97406041328845e-05, + "loss": 0.1724, + "step": 8147, + "teacher_loss": 0.16774117946624756 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.8075433969497681, + "learning_rate": 2.974018340188457e-05, + "loss": 0.4414, + "step": 8148, + "teacher_loss": 0.4006918668746948 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.1271267980337143, + "learning_rate": 2.9739762332936433e-05, + "loss": 0.1604, + "step": 8149, + "teacher_loss": 0.16413989663124084 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.41097894310951233, + "learning_rate": 2.9739340926049738e-05, + "loss": 0.3176, + "step": 8150, + "teacher_loss": 0.30727115273475647 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.7365285158157349, + "learning_rate": 2.9738919181234144e-05, + "loss": 0.8734, + "step": 8151, + "teacher_loss": 0.8885741233825684 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.6470263600349426, + "learning_rate": 2.9738497098499325e-05, + "loss": 0.405, + "step": 8152, + "teacher_loss": 0.3781614303588867 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.470409631729126, + "learning_rate": 2.9738074677854956e-05, + "loss": 0.2903, + "step": 8153, + "teacher_loss": 0.2702961564064026 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.4736995995044708, + "learning_rate": 2.9737651919310717e-05, + "loss": 0.2373, + "step": 8154, + "teacher_loss": 0.21105676889419556 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.4125184118747711, + "learning_rate": 2.973722882287631e-05, + "loss": 0.2899, + "step": 8155, + "teacher_loss": 0.2762352228164673 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.6273418664932251, + "learning_rate": 2.973680538856143e-05, + "loss": 0.3912, + "step": 8156, + "teacher_loss": 0.3649166226387024 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.3285972476005554, + "learning_rate": 2.9736381616375783e-05, + "loss": 0.3322, + "step": 8157, + "teacher_loss": 0.3325648903846741 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.16893085837364197, + "learning_rate": 2.973595750632909e-05, + "loss": 0.2054, + "step": 8158, + "teacher_loss": 0.20939815044403076 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.3801194727420807, + "learning_rate": 2.9735533058431074e-05, + "loss": 0.3053, + "step": 8159, + "teacher_loss": 0.29703962802886963 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.3367338478565216, + "learning_rate": 2.973510827269146e-05, + "loss": 0.1613, + "step": 8160, + "teacher_loss": 0.14180812239646912 + }, + { + "compression_loss": 0.0, + "epoch": 1.47, + "label_loss": 0.30335402488708496, + "learning_rate": 2.973468314911999e-05, + "loss": 0.1941, + "step": 8161, + "teacher_loss": 0.18200674653053284 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3989580273628235, + "learning_rate": 2.9734257687726416e-05, + "loss": 0.246, + "step": 8162, + "teacher_loss": 0.22901788353919983 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.7661243677139282, + "learning_rate": 2.973383188852049e-05, + "loss": 0.4453, + "step": 8163, + "teacher_loss": 0.4096333980560303 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3543039858341217, + "learning_rate": 2.9733405751511974e-05, + "loss": 0.3041, + "step": 8164, + "teacher_loss": 0.29856154322624207 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.722306489944458, + "learning_rate": 2.973297927671063e-05, + "loss": 0.304, + "step": 8165, + "teacher_loss": 0.2574799060821533 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3803054392337799, + "learning_rate": 2.973255246412625e-05, + "loss": 0.145, + "step": 8166, + "teacher_loss": 0.11884692311286926 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.776046633720398, + "learning_rate": 2.9732125313768608e-05, + "loss": 0.283, + "step": 8167, + "teacher_loss": 0.22817113995552063 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.4325186014175415, + "learning_rate": 2.9731697825647504e-05, + "loss": 0.266, + "step": 8168, + "teacher_loss": 0.24755185842514038 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.14022308588027954, + "learning_rate": 2.9731269999772737e-05, + "loss": 0.1851, + "step": 8169, + "teacher_loss": 0.190038800239563 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3932662010192871, + "learning_rate": 2.9730841836154116e-05, + "loss": 0.338, + "step": 8170, + "teacher_loss": 0.33188700675964355 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.5001851916313171, + "learning_rate": 2.9730413334801456e-05, + "loss": 0.2834, + "step": 8171, + "teacher_loss": 0.25927141308784485 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6095743179321289, + "learning_rate": 2.972998449572458e-05, + "loss": 0.3354, + "step": 8172, + "teacher_loss": 0.3048837184906006 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.7760735154151917, + "learning_rate": 2.9729555318933327e-05, + "loss": 0.5297, + "step": 8173, + "teacher_loss": 0.502333402633667 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.4768903851509094, + "learning_rate": 2.9729125804437532e-05, + "loss": 0.2944, + "step": 8174, + "teacher_loss": 0.27413681149482727 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.39413008093833923, + "learning_rate": 2.9728695952247035e-05, + "loss": 0.257, + "step": 8175, + "teacher_loss": 0.24179138243198395 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.13885346055030823, + "learning_rate": 2.9728265762371708e-05, + "loss": 0.1566, + "step": 8176, + "teacher_loss": 0.15854905545711517 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.9335932731628418, + "learning_rate": 2.97278352348214e-05, + "loss": 0.4126, + "step": 8177, + "teacher_loss": 0.35468918085098267 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6343482732772827, + "learning_rate": 2.9727404369605985e-05, + "loss": 0.2856, + "step": 8178, + "teacher_loss": 0.24679480493068695 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.2928655743598938, + "learning_rate": 2.9726973166735347e-05, + "loss": 0.1676, + "step": 8179, + "teacher_loss": 0.1536298543214798 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.2646465599536896, + "learning_rate": 2.9726541626219365e-05, + "loss": 0.2151, + "step": 8180, + "teacher_loss": 0.2095642387866974 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.216688334941864, + "learning_rate": 2.9726109748067936e-05, + "loss": 0.2873, + "step": 8181, + "teacher_loss": 0.29512155055999756 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6062241792678833, + "learning_rate": 2.9725677532290964e-05, + "loss": 0.2437, + "step": 8182, + "teacher_loss": 0.20343399047851562 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.20176056027412415, + "learning_rate": 2.9725244978898354e-05, + "loss": 0.2332, + "step": 8183, + "teacher_loss": 0.23671194911003113 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.5101134777069092, + "learning_rate": 2.9724812087900024e-05, + "loss": 0.3013, + "step": 8184, + "teacher_loss": 0.278059184551239 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6626385450363159, + "learning_rate": 2.9724378859305905e-05, + "loss": 0.4386, + "step": 8185, + "teacher_loss": 0.41375815868377686 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.34106212854385376, + "learning_rate": 2.972394529312592e-05, + "loss": 0.2231, + "step": 8186, + "teacher_loss": 0.20995593070983887 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.7625394463539124, + "learning_rate": 2.9723511389370015e-05, + "loss": 0.5808, + "step": 8187, + "teacher_loss": 0.5606253147125244 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.39818793535232544, + "learning_rate": 2.9723077148048143e-05, + "loss": 0.2098, + "step": 8188, + "teacher_loss": 0.18884649872779846 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.703392505645752, + "learning_rate": 2.9722642569170244e-05, + "loss": 0.2159, + "step": 8189, + "teacher_loss": 0.16175755858421326 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.2669304609298706, + "learning_rate": 2.9722207652746297e-05, + "loss": 0.1991, + "step": 8190, + "teacher_loss": 0.1916126012802124 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.4417065680027008, + "learning_rate": 2.972177239878627e-05, + "loss": 0.2649, + "step": 8191, + "teacher_loss": 0.2452913224697113 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.12884913384914398, + "learning_rate": 2.9721336807300134e-05, + "loss": 0.1785, + "step": 8192, + "teacher_loss": 0.18400683999061584 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.8325263261795044, + "learning_rate": 2.9720900878297887e-05, + "loss": 0.6302, + "step": 8193, + "teacher_loss": 0.6077010631561279 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.9836317896842957, + "learning_rate": 2.972046461178952e-05, + "loss": 0.366, + "step": 8194, + "teacher_loss": 0.29736989736557007 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.37794429063796997, + "learning_rate": 2.972002800778503e-05, + "loss": 0.2983, + "step": 8195, + "teacher_loss": 0.2894938588142395 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.539686381816864, + "learning_rate": 2.971959106629443e-05, + "loss": 0.2719, + "step": 8196, + "teacher_loss": 0.24218055605888367 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.2979428768157959, + "learning_rate": 2.971915378732774e-05, + "loss": 0.2508, + "step": 8197, + "teacher_loss": 0.2455427348613739 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.8227146863937378, + "learning_rate": 2.9718716170894987e-05, + "loss": 0.3418, + "step": 8198, + "teacher_loss": 0.28837132453918457 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3547306954860687, + "learning_rate": 2.97182782170062e-05, + "loss": 0.2315, + "step": 8199, + "teacher_loss": 0.21777455508708954 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3757646083831787, + "learning_rate": 2.9717839925671414e-05, + "loss": 0.3293, + "step": 8200, + "teacher_loss": 0.3241514265537262 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6775994300842285, + "learning_rate": 2.971740129690069e-05, + "loss": 0.271, + "step": 8201, + "teacher_loss": 0.22585608065128326 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.693859338760376, + "learning_rate": 2.971696233070408e-05, + "loss": 0.3591, + "step": 8202, + "teacher_loss": 0.32188284397125244 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.4581086039543152, + "learning_rate": 2.971652302709165e-05, + "loss": 0.2462, + "step": 8203, + "teacher_loss": 0.22261425852775574 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.4841664135456085, + "learning_rate": 2.9716083386073465e-05, + "loss": 0.3256, + "step": 8204, + "teacher_loss": 0.3080095052719116 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.30393919348716736, + "learning_rate": 2.971564340765961e-05, + "loss": 0.2104, + "step": 8205, + "teacher_loss": 0.2000046968460083 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.2805207073688507, + "learning_rate": 2.9715203091860172e-05, + "loss": 0.1824, + "step": 8206, + "teacher_loss": 0.1715065836906433 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.3695563077926636, + "learning_rate": 2.9714762438685246e-05, + "loss": 0.2731, + "step": 8207, + "teacher_loss": 0.26233798265457153 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6023653745651245, + "learning_rate": 2.9714321448144934e-05, + "loss": 0.4234, + "step": 8208, + "teacher_loss": 0.40354806184768677 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6006008386611938, + "learning_rate": 2.9713880120249346e-05, + "loss": 0.4396, + "step": 8209, + "teacher_loss": 0.421748548746109 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.5437072515487671, + "learning_rate": 2.9713438455008603e-05, + "loss": 0.2636, + "step": 8210, + "teacher_loss": 0.23248618841171265 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.6870304346084595, + "learning_rate": 2.9712996452432827e-05, + "loss": 0.5096, + "step": 8211, + "teacher_loss": 0.48989659547805786 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.2999982237815857, + "learning_rate": 2.9712554112532157e-05, + "loss": 0.2418, + "step": 8212, + "teacher_loss": 0.23538298904895782 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.639070451259613, + "learning_rate": 2.971211143531673e-05, + "loss": 0.2903, + "step": 8213, + "teacher_loss": 0.2515646815299988 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.38200056552886963, + "learning_rate": 2.97116684207967e-05, + "loss": 0.2586, + "step": 8214, + "teacher_loss": 0.24486488103866577 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.21200186014175415, + "learning_rate": 2.9711225068982217e-05, + "loss": 0.1455, + "step": 8215, + "teacher_loss": 0.1380728930234909 + }, + { + "compression_loss": 0.0, + "epoch": 1.48, + "label_loss": 0.5590644478797913, + "learning_rate": 2.971078137988345e-05, + "loss": 0.2253, + "step": 8216, + "teacher_loss": 0.18820244073867798 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.2474207580089569, + "learning_rate": 2.9710337353510573e-05, + "loss": 0.1786, + "step": 8217, + "teacher_loss": 0.17090237140655518 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.7600335478782654, + "learning_rate": 2.9709892989873764e-05, + "loss": 0.2962, + "step": 8218, + "teacher_loss": 0.2446936070919037 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.3686853051185608, + "learning_rate": 2.970944828898321e-05, + "loss": 0.318, + "step": 8219, + "teacher_loss": 0.31234419345855713 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.42755141854286194, + "learning_rate": 2.970900325084911e-05, + "loss": 0.2396, + "step": 8220, + "teacher_loss": 0.21868109703063965 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.31360459327697754, + "learning_rate": 2.9708557875481665e-05, + "loss": 0.2491, + "step": 8221, + "teacher_loss": 0.24194639921188354 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.4301757216453552, + "learning_rate": 2.9708112162891084e-05, + "loss": 0.3686, + "step": 8222, + "teacher_loss": 0.3617165982723236 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.8100927472114563, + "learning_rate": 2.9707666113087593e-05, + "loss": 1.001, + "step": 8223, + "teacher_loss": 1.0221672058105469 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5292739868164062, + "learning_rate": 2.970721972608141e-05, + "loss": 0.3204, + "step": 8224, + "teacher_loss": 0.2972298860549927 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.8262772560119629, + "learning_rate": 2.9706773001882772e-05, + "loss": 0.3199, + "step": 8225, + "teacher_loss": 0.26363474130630493 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.3406209349632263, + "learning_rate": 2.9706325940501927e-05, + "loss": 0.1973, + "step": 8226, + "teacher_loss": 0.18135693669319153 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.7358806133270264, + "learning_rate": 2.9705878541949118e-05, + "loss": 0.3657, + "step": 8227, + "teacher_loss": 0.32459670305252075 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.8401494026184082, + "learning_rate": 2.9705430806234603e-05, + "loss": 0.4209, + "step": 8228, + "teacher_loss": 0.37436389923095703 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.18290647864341736, + "learning_rate": 2.970498273336865e-05, + "loss": 0.2203, + "step": 8229, + "teacher_loss": 0.22450795769691467 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.41541701555252075, + "learning_rate": 2.970453432336153e-05, + "loss": 0.2077, + "step": 8230, + "teacher_loss": 0.18463781476020813 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.766947865486145, + "learning_rate": 2.970408557622353e-05, + "loss": 0.5848, + "step": 8231, + "teacher_loss": 0.5645679235458374 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.7842399477958679, + "learning_rate": 2.9703636491964925e-05, + "loss": 0.3656, + "step": 8232, + "teacher_loss": 0.31905221939086914 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.2675301134586334, + "learning_rate": 2.970318707059602e-05, + "loss": 0.2196, + "step": 8233, + "teacher_loss": 0.21426743268966675 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.4327450096607208, + "learning_rate": 2.970273731212712e-05, + "loss": 0.2917, + "step": 8234, + "teacher_loss": 0.27598896622657776 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.38992100954055786, + "learning_rate": 2.9702287216568537e-05, + "loss": 0.3075, + "step": 8235, + "teacher_loss": 0.2983550727367401 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.6753556132316589, + "learning_rate": 2.9701836783930584e-05, + "loss": 0.2496, + "step": 8236, + "teacher_loss": 0.20231623947620392 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.9020670652389526, + "learning_rate": 2.9701386014223592e-05, + "loss": 0.325, + "step": 8237, + "teacher_loss": 0.26086899638175964 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.3401253819465637, + "learning_rate": 2.9700934907457898e-05, + "loss": 0.2299, + "step": 8238, + "teacher_loss": 0.21763408184051514 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.18903255462646484, + "learning_rate": 2.9700483463643838e-05, + "loss": 0.2491, + "step": 8239, + "teacher_loss": 0.2557646334171295 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.38801127672195435, + "learning_rate": 2.970003168279177e-05, + "loss": 0.2433, + "step": 8240, + "teacher_loss": 0.22721263766288757 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5279019474983215, + "learning_rate": 2.9699579564912048e-05, + "loss": 0.2758, + "step": 8241, + "teacher_loss": 0.2477797120809555 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.4817383587360382, + "learning_rate": 2.969912711001504e-05, + "loss": 0.228, + "step": 8242, + "teacher_loss": 0.19977101683616638 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.544009804725647, + "learning_rate": 2.9698674318111117e-05, + "loss": 0.2635, + "step": 8243, + "teacher_loss": 0.23233303427696228 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.2647302448749542, + "learning_rate": 2.969822118921066e-05, + "loss": 0.2289, + "step": 8244, + "teacher_loss": 0.2248658835887909 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 1.121783971786499, + "learning_rate": 2.9697767723324058e-05, + "loss": 0.2478, + "step": 8245, + "teacher_loss": 0.1507301926612854 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.6131390333175659, + "learning_rate": 2.9697313920461708e-05, + "loss": 0.3032, + "step": 8246, + "teacher_loss": 0.26878905296325684 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.3602389395236969, + "learning_rate": 2.9696859780634016e-05, + "loss": 0.1962, + "step": 8247, + "teacher_loss": 0.1779191493988037 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.8005020022392273, + "learning_rate": 2.9696405303851392e-05, + "loss": 0.3767, + "step": 8248, + "teacher_loss": 0.32958823442459106 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.2803199589252472, + "learning_rate": 2.9695950490124256e-05, + "loss": 0.2144, + "step": 8249, + "teacher_loss": 0.20709499716758728 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 1.0980370044708252, + "learning_rate": 2.9695495339463036e-05, + "loss": 1.0336, + "step": 8250, + "teacher_loss": 1.0264418125152588 + }, + { + "epoch": 1.49, + "eval_exact_match": 79.13907284768212, + "eval_f1": 86.71594304916012, + "step": 8250 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.2720036804676056, + "learning_rate": 2.969503985187817e-05, + "loss": 0.219, + "step": 8251, + "teacher_loss": 0.21315529942512512 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.3008033037185669, + "learning_rate": 2.9694584027380094e-05, + "loss": 0.2526, + "step": 8252, + "teacher_loss": 0.2472873032093048 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.48980453610420227, + "learning_rate": 2.9694127865979266e-05, + "loss": 0.3265, + "step": 8253, + "teacher_loss": 0.3083362877368927 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.6193699836730957, + "learning_rate": 2.969367136768614e-05, + "loss": 0.3883, + "step": 8254, + "teacher_loss": 0.3626496493816376 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.6130605936050415, + "learning_rate": 2.9693214532511183e-05, + "loss": 0.365, + "step": 8255, + "teacher_loss": 0.33739152550697327 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.6840561628341675, + "learning_rate": 2.969275736046487e-05, + "loss": 0.3971, + "step": 8256, + "teacher_loss": 0.36526957154273987 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5802739262580872, + "learning_rate": 2.9692299851557686e-05, + "loss": 0.2526, + "step": 8257, + "teacher_loss": 0.2161685675382614 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.11517634987831116, + "learning_rate": 2.9691842005800113e-05, + "loss": 0.1811, + "step": 8258, + "teacher_loss": 0.18841925263404846 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5294514298439026, + "learning_rate": 2.969138382320265e-05, + "loss": 0.2351, + "step": 8259, + "teacher_loss": 0.20243753492832184 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.6079145669937134, + "learning_rate": 2.9690925303775802e-05, + "loss": 0.3257, + "step": 8260, + "teacher_loss": 0.2943679392337799 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.867706298828125, + "learning_rate": 2.969046644753008e-05, + "loss": 0.3728, + "step": 8261, + "teacher_loss": 0.31786417961120605 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.35639429092407227, + "learning_rate": 2.9690007254476014e-05, + "loss": 0.283, + "step": 8262, + "teacher_loss": 0.27480629086494446 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.8812310695648193, + "learning_rate": 2.968954772462412e-05, + "loss": 0.3739, + "step": 8263, + "teacher_loss": 0.31751763820648193 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.2417874038219452, + "learning_rate": 2.9689087857984942e-05, + "loss": 0.2625, + "step": 8264, + "teacher_loss": 0.2648100256919861 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.775303840637207, + "learning_rate": 2.9688627654569013e-05, + "loss": 0.312, + "step": 8265, + "teacher_loss": 0.2605516016483307 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5896140336990356, + "learning_rate": 2.9688167114386898e-05, + "loss": 0.3523, + "step": 8266, + "teacher_loss": 0.3259405195713043 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.3817254900932312, + "learning_rate": 2.9687706237449143e-05, + "loss": 0.2645, + "step": 8267, + "teacher_loss": 0.2514649033546448 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5383740663528442, + "learning_rate": 2.968724502376632e-05, + "loss": 0.2871, + "step": 8268, + "teacher_loss": 0.259149432182312 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.5733349919319153, + "learning_rate": 2.9686783473349006e-05, + "loss": 0.32, + "step": 8269, + "teacher_loss": 0.2918621599674225 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.52850741147995, + "learning_rate": 2.968632158620778e-05, + "loss": 0.2562, + "step": 8270, + "teacher_loss": 0.2259790599346161 + }, + { + "compression_loss": 0.0, + "epoch": 1.49, + "label_loss": 0.29134970903396606, + "learning_rate": 2.9685859362353235e-05, + "loss": 0.2619, + "step": 8271, + "teacher_loss": 0.2586093544960022 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.19141846895217896, + "learning_rate": 2.968539680179596e-05, + "loss": 0.2085, + "step": 8272, + "teacher_loss": 0.2104235589504242 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.5316446423530579, + "learning_rate": 2.9684933904546564e-05, + "loss": 0.2964, + "step": 8273, + "teacher_loss": 0.27026551961898804 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.28756818175315857, + "learning_rate": 2.9684470670615667e-05, + "loss": 0.3136, + "step": 8274, + "teacher_loss": 0.316439151763916 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.4492422044277191, + "learning_rate": 2.968400710001388e-05, + "loss": 0.3842, + "step": 8275, + "teacher_loss": 0.3769516944885254 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.6005218029022217, + "learning_rate": 2.9683543192751836e-05, + "loss": 0.2991, + "step": 8276, + "teacher_loss": 0.2656573951244354 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.47699397802352905, + "learning_rate": 2.9683078948840175e-05, + "loss": 0.24, + "step": 8277, + "teacher_loss": 0.21366241574287415 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.5211970210075378, + "learning_rate": 2.968261436828953e-05, + "loss": 0.3075, + "step": 8278, + "teacher_loss": 0.28372249007225037 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.6410287618637085, + "learning_rate": 2.968214945111056e-05, + "loss": 0.2762, + "step": 8279, + "teacher_loss": 0.2356693595647812 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.676201343536377, + "learning_rate": 2.968168419731392e-05, + "loss": 0.3573, + "step": 8280, + "teacher_loss": 0.3219008445739746 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.26739197969436646, + "learning_rate": 2.9681218606910283e-05, + "loss": 0.1846, + "step": 8281, + "teacher_loss": 0.17536070942878723 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.49219387769699097, + "learning_rate": 2.968075267991032e-05, + "loss": 0.3952, + "step": 8282, + "teacher_loss": 0.38438600301742554 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.21525005996227264, + "learning_rate": 2.968028641632471e-05, + "loss": 0.2165, + "step": 8283, + "teacher_loss": 0.21665681898593903 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.27386558055877686, + "learning_rate": 2.9679819816164148e-05, + "loss": 0.2375, + "step": 8284, + "teacher_loss": 0.2335040271282196 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.4536552131175995, + "learning_rate": 2.9679352879439328e-05, + "loss": 0.2979, + "step": 8285, + "teacher_loss": 0.2805665135383606 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.2945255935192108, + "learning_rate": 2.9678885606160962e-05, + "loss": 0.2119, + "step": 8286, + "teacher_loss": 0.20273634791374207 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.3294672966003418, + "learning_rate": 2.9678417996339757e-05, + "loss": 0.2745, + "step": 8287, + "teacher_loss": 0.26834189891815186 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.40236687660217285, + "learning_rate": 2.9677950049986432e-05, + "loss": 0.2546, + "step": 8288, + "teacher_loss": 0.23822659254074097 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.9912687540054321, + "learning_rate": 2.967748176711172e-05, + "loss": 0.37, + "step": 8289, + "teacher_loss": 0.30093270540237427 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.4055962860584259, + "learning_rate": 2.967701314772636e-05, + "loss": 0.2984, + "step": 8290, + "teacher_loss": 0.2865150272846222 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.5629176497459412, + "learning_rate": 2.9676544191841094e-05, + "loss": 0.3079, + "step": 8291, + "teacher_loss": 0.2795999050140381 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.44685113430023193, + "learning_rate": 2.9676074899466666e-05, + "loss": 0.2393, + "step": 8292, + "teacher_loss": 0.21621333062648773 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.46933919191360474, + "learning_rate": 2.9675605270613845e-05, + "loss": 0.3014, + "step": 8293, + "teacher_loss": 0.28272271156311035 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.8725112676620483, + "learning_rate": 2.9675135305293394e-05, + "loss": 0.7238, + "step": 8294, + "teacher_loss": 0.7072983980178833 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.39136794209480286, + "learning_rate": 2.967466500351609e-05, + "loss": 0.2694, + "step": 8295, + "teacher_loss": 0.25588706135749817 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.6513285636901855, + "learning_rate": 2.967419436529271e-05, + "loss": 0.2742, + "step": 8296, + "teacher_loss": 0.23228520154953003 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.38436412811279297, + "learning_rate": 2.9673723390634055e-05, + "loss": 0.2534, + "step": 8297, + "teacher_loss": 0.23882606625556946 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.4502195715904236, + "learning_rate": 2.9673252079550917e-05, + "loss": 0.5762, + "step": 8298, + "teacher_loss": 0.5901873707771301 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.5212042331695557, + "learning_rate": 2.9672780432054095e-05, + "loss": 0.2499, + "step": 8299, + "teacher_loss": 0.21971049904823303 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.7054161429405212, + "learning_rate": 2.9672308448154415e-05, + "loss": 0.3212, + "step": 8300, + "teacher_loss": 0.278546005487442 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.3220503032207489, + "learning_rate": 2.967183612786269e-05, + "loss": 0.2564, + "step": 8301, + "teacher_loss": 0.24906158447265625 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.35697615146636963, + "learning_rate": 2.967136347118975e-05, + "loss": 0.2163, + "step": 8302, + "teacher_loss": 0.20071209967136383 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.7200969457626343, + "learning_rate": 2.967089047814643e-05, + "loss": 0.3181, + "step": 8303, + "teacher_loss": 0.273413747549057 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.44575589895248413, + "learning_rate": 2.9670417148743586e-05, + "loss": 0.2111, + "step": 8304, + "teacher_loss": 0.1850753277540207 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.7856945991516113, + "learning_rate": 2.966994348299205e-05, + "loss": 0.403, + "step": 8305, + "teacher_loss": 0.36050018668174744 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.3099052309989929, + "learning_rate": 2.9669469480902698e-05, + "loss": 0.2279, + "step": 8306, + "teacher_loss": 0.21882228553295135 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.35392987728118896, + "learning_rate": 2.966899514248639e-05, + "loss": 0.2893, + "step": 8307, + "teacher_loss": 0.28210657835006714 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.30253785848617554, + "learning_rate": 2.966852046775401e-05, + "loss": 0.2339, + "step": 8308, + "teacher_loss": 0.22630849480628967 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.29873746633529663, + "learning_rate": 2.966804545671643e-05, + "loss": 0.2763, + "step": 8309, + "teacher_loss": 0.27378422021865845 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.7076412439346313, + "learning_rate": 2.9667570109384546e-05, + "loss": 0.2791, + "step": 8310, + "teacher_loss": 0.23151251673698425 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.8246315717697144, + "learning_rate": 2.9667094425769252e-05, + "loss": 0.2958, + "step": 8311, + "teacher_loss": 0.23702660202980042 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.8957650661468506, + "learning_rate": 2.966661840588146e-05, + "loss": 0.3596, + "step": 8312, + "teacher_loss": 0.29998862743377686 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.1701478660106659, + "learning_rate": 2.9666142049732078e-05, + "loss": 0.1866, + "step": 8313, + "teacher_loss": 0.18845608830451965 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.78484708070755, + "learning_rate": 2.966566535733203e-05, + "loss": 0.421, + "step": 8314, + "teacher_loss": 0.3805864155292511 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.19726255536079407, + "learning_rate": 2.9665188328692245e-05, + "loss": 0.2192, + "step": 8315, + "teacher_loss": 0.22168636322021484 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.4940336048603058, + "learning_rate": 2.9664710963823664e-05, + "loss": 0.2325, + "step": 8316, + "teacher_loss": 0.20341017842292786 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.44547832012176514, + "learning_rate": 2.9664233262737223e-05, + "loss": 0.2189, + "step": 8317, + "teacher_loss": 0.19375723600387573 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.2453044056892395, + "learning_rate": 2.9663755225443883e-05, + "loss": 0.1812, + "step": 8318, + "teacher_loss": 0.1740460991859436 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.2473374307155609, + "learning_rate": 2.96632768519546e-05, + "loss": 0.2587, + "step": 8319, + "teacher_loss": 0.2599189579486847 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.6977126002311707, + "learning_rate": 2.9662798142280337e-05, + "loss": 0.3048, + "step": 8320, + "teacher_loss": 0.2611473798751831 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.5293257832527161, + "learning_rate": 2.966231909643208e-05, + "loss": 0.2695, + "step": 8321, + "teacher_loss": 0.24065130949020386 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.1446615606546402, + "learning_rate": 2.9661839714420803e-05, + "loss": 0.1859, + "step": 8322, + "teacher_loss": 0.1905125081539154 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.5196651220321655, + "learning_rate": 2.9661359996257498e-05, + "loss": 0.3741, + "step": 8323, + "teacher_loss": 0.3579312264919281 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.3940209746360779, + "learning_rate": 2.966087994195317e-05, + "loss": 0.2549, + "step": 8324, + "teacher_loss": 0.2394266426563263 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.29434457421302795, + "learning_rate": 2.9660399551518814e-05, + "loss": 0.2261, + "step": 8325, + "teacher_loss": 0.21848604083061218 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.2795717716217041, + "learning_rate": 2.965991882496546e-05, + "loss": 0.2701, + "step": 8326, + "teacher_loss": 0.2690196633338928 + }, + { + "compression_loss": 0.0, + "epoch": 1.5, + "label_loss": 0.2920002341270447, + "learning_rate": 2.965943776230411e-05, + "loss": 0.2613, + "step": 8327, + "teacher_loss": 0.2578977346420288 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5741807818412781, + "learning_rate": 2.9658956363545807e-05, + "loss": 0.3677, + "step": 8328, + "teacher_loss": 0.34473544359207153 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5791088342666626, + "learning_rate": 2.9658474628701585e-05, + "loss": 0.3515, + "step": 8329, + "teacher_loss": 0.3262072801589966 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.19352833926677704, + "learning_rate": 2.965799255778249e-05, + "loss": 0.2056, + "step": 8330, + "teacher_loss": 0.20698420703411102 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.2579488456249237, + "learning_rate": 2.965751015079957e-05, + "loss": 0.1972, + "step": 8331, + "teacher_loss": 0.1904098093509674 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.624376118183136, + "learning_rate": 2.965702740776389e-05, + "loss": 0.2744, + "step": 8332, + "teacher_loss": 0.23547488451004028 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5111129283905029, + "learning_rate": 2.965654432868651e-05, + "loss": 0.3857, + "step": 8333, + "teacher_loss": 0.3717600703239441 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.3470556139945984, + "learning_rate": 2.9656060913578518e-05, + "loss": 0.2462, + "step": 8334, + "teacher_loss": 0.23500967025756836 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5388896465301514, + "learning_rate": 2.9655577162450987e-05, + "loss": 0.4477, + "step": 8335, + "teacher_loss": 0.4375278949737549 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.40195631980895996, + "learning_rate": 2.9655093075315013e-05, + "loss": 0.2458, + "step": 8336, + "teacher_loss": 0.22844737768173218 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.37421172857284546, + "learning_rate": 2.9654608652181692e-05, + "loss": 0.1919, + "step": 8337, + "teacher_loss": 0.1716611534357071 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.4291038513183594, + "learning_rate": 2.9654123893062128e-05, + "loss": 0.2376, + "step": 8338, + "teacher_loss": 0.21633939445018768 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.6496659517288208, + "learning_rate": 2.9653638797967443e-05, + "loss": 0.3195, + "step": 8339, + "teacher_loss": 0.2828552722930908 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.4038991332054138, + "learning_rate": 2.965315336690875e-05, + "loss": 0.3394, + "step": 8340, + "teacher_loss": 0.33224791288375854 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5648466944694519, + "learning_rate": 2.9652667599897188e-05, + "loss": 0.2966, + "step": 8341, + "teacher_loss": 0.2668426036834717 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5127744674682617, + "learning_rate": 2.9652181496943888e-05, + "loss": 0.2889, + "step": 8342, + "teacher_loss": 0.26402172446250916 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.768584132194519, + "learning_rate": 2.9651695058059994e-05, + "loss": 0.4148, + "step": 8343, + "teacher_loss": 0.37549591064453125 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.47389882802963257, + "learning_rate": 2.965120828325666e-05, + "loss": 0.2331, + "step": 8344, + "teacher_loss": 0.2063218057155609 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.2705867290496826, + "learning_rate": 2.9650721172545048e-05, + "loss": 0.2408, + "step": 8345, + "teacher_loss": 0.2375270128250122 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5164929032325745, + "learning_rate": 2.9650233725936323e-05, + "loss": 0.3, + "step": 8346, + "teacher_loss": 0.27595674991607666 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.3479204773902893, + "learning_rate": 2.964974594344167e-05, + "loss": 0.3135, + "step": 8347, + "teacher_loss": 0.30964529514312744 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.44368255138397217, + "learning_rate": 2.9649257825072256e-05, + "loss": 0.2324, + "step": 8348, + "teacher_loss": 0.20887935161590576 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.3144627511501312, + "learning_rate": 2.9648769370839283e-05, + "loss": 0.3674, + "step": 8349, + "teacher_loss": 0.3732324242591858 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.6156946420669556, + "learning_rate": 2.9648280580753945e-05, + "loss": 0.2452, + "step": 8350, + "teacher_loss": 0.2040124386548996 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.17413514852523804, + "learning_rate": 2.9647791454827456e-05, + "loss": 0.2295, + "step": 8351, + "teacher_loss": 0.235652357339859 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.8269870281219482, + "learning_rate": 2.9647301993071022e-05, + "loss": 0.49, + "step": 8352, + "teacher_loss": 0.4525560736656189 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.6094679236412048, + "learning_rate": 2.964681219549587e-05, + "loss": 0.2795, + "step": 8353, + "teacher_loss": 0.24284467101097107 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.28075844049453735, + "learning_rate": 2.9646322062113225e-05, + "loss": 0.2488, + "step": 8354, + "teacher_loss": 0.24528086185455322 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5051464438438416, + "learning_rate": 2.964583159293433e-05, + "loss": 0.3247, + "step": 8355, + "teacher_loss": 0.3046274185180664 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.6898150444030762, + "learning_rate": 2.9645340787970426e-05, + "loss": 0.3297, + "step": 8356, + "teacher_loss": 0.2896538972854614 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5490769147872925, + "learning_rate": 2.964484964723277e-05, + "loss": 0.3293, + "step": 8357, + "teacher_loss": 0.30491000413894653 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.3549392819404602, + "learning_rate": 2.964435817073261e-05, + "loss": 0.2043, + "step": 8358, + "teacher_loss": 0.18755456805229187 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.20744769275188446, + "learning_rate": 2.9643866358481236e-05, + "loss": 0.2266, + "step": 8359, + "teacher_loss": 0.22875502705574036 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.47783827781677246, + "learning_rate": 2.9643374210489904e-05, + "loss": 0.2722, + "step": 8360, + "teacher_loss": 0.24934379756450653 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.6166070699691772, + "learning_rate": 2.9642881726769903e-05, + "loss": 0.2426, + "step": 8361, + "teacher_loss": 0.20106589794158936 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.2669008672237396, + "learning_rate": 2.9642388907332525e-05, + "loss": 0.1978, + "step": 8362, + "teacher_loss": 0.19013383984565735 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.3201882243156433, + "learning_rate": 2.9641895752189074e-05, + "loss": 0.1609, + "step": 8363, + "teacher_loss": 0.14325255155563354 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5220135450363159, + "learning_rate": 2.964140226135085e-05, + "loss": 0.2941, + "step": 8364, + "teacher_loss": 0.26881182193756104 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5362215042114258, + "learning_rate": 2.964090843482917e-05, + "loss": 0.3421, + "step": 8365, + "teacher_loss": 0.32055777311325073 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.47233107686042786, + "learning_rate": 2.9640414272635352e-05, + "loss": 0.2496, + "step": 8366, + "teacher_loss": 0.22484159469604492 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.33339595794677734, + "learning_rate": 2.963991977478073e-05, + "loss": 0.2078, + "step": 8367, + "teacher_loss": 0.19383983314037323 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.40793848037719727, + "learning_rate": 2.9639424941276647e-05, + "loss": 0.3011, + "step": 8368, + "teacher_loss": 0.2892279624938965 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.8922539949417114, + "learning_rate": 2.9638929772134433e-05, + "loss": 0.2839, + "step": 8369, + "teacher_loss": 0.21629515290260315 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.40253862738609314, + "learning_rate": 2.9638434267365456e-05, + "loss": 0.2682, + "step": 8370, + "teacher_loss": 0.2532370090484619 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.43802812695503235, + "learning_rate": 2.9637938426981062e-05, + "loss": 0.2464, + "step": 8371, + "teacher_loss": 0.22511237859725952 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5935195684432983, + "learning_rate": 2.963744225099263e-05, + "loss": 0.3076, + "step": 8372, + "teacher_loss": 0.27581462264060974 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.3677457273006439, + "learning_rate": 2.9636945739411533e-05, + "loss": 0.2266, + "step": 8373, + "teacher_loss": 0.21091234683990479 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.8131536841392517, + "learning_rate": 2.9636448892249156e-05, + "loss": 0.4475, + "step": 8374, + "teacher_loss": 0.40686312317848206 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.18671363592147827, + "learning_rate": 2.963595170951689e-05, + "loss": 0.178, + "step": 8375, + "teacher_loss": 0.17702318727970123 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.24322748184204102, + "learning_rate": 2.9635454191226123e-05, + "loss": 0.3136, + "step": 8376, + "teacher_loss": 0.321449339389801 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.21783912181854248, + "learning_rate": 2.963495633738828e-05, + "loss": 0.2514, + "step": 8377, + "teacher_loss": 0.25513410568237305 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.6475422978401184, + "learning_rate": 2.963445814801476e-05, + "loss": 0.319, + "step": 8378, + "teacher_loss": 0.2825337052345276 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.5407238006591797, + "learning_rate": 2.9633959623116995e-05, + "loss": 0.3031, + "step": 8379, + "teacher_loss": 0.276676744222641 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.4231138527393341, + "learning_rate": 2.963346076270641e-05, + "loss": 0.2607, + "step": 8380, + "teacher_loss": 0.24264803528785706 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.4908568263053894, + "learning_rate": 2.9632961566794444e-05, + "loss": 0.2757, + "step": 8381, + "teacher_loss": 0.2517714500427246 + }, + { + "compression_loss": 0.0, + "epoch": 1.51, + "label_loss": 0.8232316970825195, + "learning_rate": 2.9632462035392537e-05, + "loss": 0.4056, + "step": 8382, + "teacher_loss": 0.3591885268688202 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.19213512539863586, + "learning_rate": 2.9631962168512146e-05, + "loss": 0.2697, + "step": 8383, + "teacher_loss": 0.2783271372318268 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.2417571246623993, + "learning_rate": 2.9631461966164735e-05, + "loss": 0.2084, + "step": 8384, + "teacher_loss": 0.2046608179807663 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.27906641364097595, + "learning_rate": 2.9630961428361768e-05, + "loss": 0.264, + "step": 8385, + "teacher_loss": 0.2623758018016815 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.3609544038772583, + "learning_rate": 2.963046055511472e-05, + "loss": 0.2347, + "step": 8386, + "teacher_loss": 0.22070875763893127 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5492177605628967, + "learning_rate": 2.9629959346435077e-05, + "loss": 0.2443, + "step": 8387, + "teacher_loss": 0.21047580242156982 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.33009809255599976, + "learning_rate": 2.962945780233433e-05, + "loss": 0.2253, + "step": 8388, + "teacher_loss": 0.2136225700378418 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4573343098163605, + "learning_rate": 2.9628955922823972e-05, + "loss": 0.2763, + "step": 8389, + "teacher_loss": 0.25623732805252075 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5541173219680786, + "learning_rate": 2.9628453707915518e-05, + "loss": 0.3137, + "step": 8390, + "teacher_loss": 0.28696388006210327 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6047319173812866, + "learning_rate": 2.962795115762048e-05, + "loss": 0.2803, + "step": 8391, + "teacher_loss": 0.24419817328453064 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.32866793870925903, + "learning_rate": 2.962744827195038e-05, + "loss": 0.2106, + "step": 8392, + "teacher_loss": 0.19745652377605438 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5538060069084167, + "learning_rate": 2.9626945050916745e-05, + "loss": 0.2181, + "step": 8393, + "teacher_loss": 0.18079423904418945 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.3148215711116791, + "learning_rate": 2.962644149453111e-05, + "loss": 0.1793, + "step": 8394, + "teacher_loss": 0.1642608344554901 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.31869810819625854, + "learning_rate": 2.962593760280503e-05, + "loss": 0.1841, + "step": 8395, + "teacher_loss": 0.16911497712135315 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5265036821365356, + "learning_rate": 2.962543337575005e-05, + "loss": 0.38, + "step": 8396, + "teacher_loss": 0.3637319505214691 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.3908854126930237, + "learning_rate": 2.9624928813377724e-05, + "loss": 0.2611, + "step": 8397, + "teacher_loss": 0.24672654271125793 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.28283023834228516, + "learning_rate": 2.962442391569964e-05, + "loss": 0.2506, + "step": 8398, + "teacher_loss": 0.24703805148601532 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6269611120223999, + "learning_rate": 2.9623918682727355e-05, + "loss": 0.3314, + "step": 8399, + "teacher_loss": 0.2985045313835144 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.25144824385643005, + "learning_rate": 2.9623413114472458e-05, + "loss": 0.241, + "step": 8400, + "teacher_loss": 0.2398124784231186 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.19570410251617432, + "learning_rate": 2.962290721094655e-05, + "loss": 0.2497, + "step": 8401, + "teacher_loss": 0.25573664903640747 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.7429532408714294, + "learning_rate": 2.9622400972161214e-05, + "loss": 0.3798, + "step": 8402, + "teacher_loss": 0.3395037055015564 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6496878862380981, + "learning_rate": 2.9621894398128066e-05, + "loss": 0.2914, + "step": 8403, + "teacher_loss": 0.2515791356563568 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5783128142356873, + "learning_rate": 2.9621387488858715e-05, + "loss": 0.3202, + "step": 8404, + "teacher_loss": 0.29154735803604126 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4161297678947449, + "learning_rate": 2.9620880244364792e-05, + "loss": 0.3433, + "step": 8405, + "teacher_loss": 0.3352524936199188 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4336242079734802, + "learning_rate": 2.9620372664657916e-05, + "loss": 0.2697, + "step": 8406, + "teacher_loss": 0.25147441029548645 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.22214964032173157, + "learning_rate": 2.961986474974973e-05, + "loss": 0.1863, + "step": 8407, + "teacher_loss": 0.18227848410606384 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.12293720245361328, + "learning_rate": 2.961935649965188e-05, + "loss": 0.1328, + "step": 8408, + "teacher_loss": 0.13388575613498688 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.9924032092094421, + "learning_rate": 2.9618847914376014e-05, + "loss": 0.3607, + "step": 8409, + "teacher_loss": 0.29051482677459717 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.3416539430618286, + "learning_rate": 2.96183389939338e-05, + "loss": 0.2569, + "step": 8410, + "teacher_loss": 0.24749934673309326 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4048881530761719, + "learning_rate": 2.9617829738336894e-05, + "loss": 0.3805, + "step": 8411, + "teacher_loss": 0.3777827024459839 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4789392650127411, + "learning_rate": 2.9617320147596982e-05, + "loss": 0.2999, + "step": 8412, + "teacher_loss": 0.2800263464450836 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4117765426635742, + "learning_rate": 2.9616810221725743e-05, + "loss": 0.2911, + "step": 8413, + "teacher_loss": 0.2777364253997803 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.3812519907951355, + "learning_rate": 2.9616299960734873e-05, + "loss": 0.2731, + "step": 8414, + "teacher_loss": 0.26104792952537537 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.7033828496932983, + "learning_rate": 2.961578936463606e-05, + "loss": 0.3322, + "step": 8415, + "teacher_loss": 0.29092758893966675 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.28939089179039, + "learning_rate": 2.9615278433441024e-05, + "loss": 0.2216, + "step": 8416, + "teacher_loss": 0.214043527841568 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.7313584089279175, + "learning_rate": 2.9614767167161467e-05, + "loss": 0.2734, + "step": 8417, + "teacher_loss": 0.22255347669124603 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5938645601272583, + "learning_rate": 2.9614255565809117e-05, + "loss": 0.342, + "step": 8418, + "teacher_loss": 0.31401240825653076 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4795907735824585, + "learning_rate": 2.9613743629395707e-05, + "loss": 0.3109, + "step": 8419, + "teacher_loss": 0.2921314835548401 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.9788291454315186, + "learning_rate": 2.9613231357932967e-05, + "loss": 0.3996, + "step": 8420, + "teacher_loss": 0.3352123498916626 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6550199389457703, + "learning_rate": 2.9612718751432646e-05, + "loss": 0.355, + "step": 8421, + "teacher_loss": 0.32168450951576233 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.35970908403396606, + "learning_rate": 2.9612205809906495e-05, + "loss": 0.3148, + "step": 8422, + "teacher_loss": 0.3098025918006897 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.48537376523017883, + "learning_rate": 2.961169253336627e-05, + "loss": 0.3056, + "step": 8423, + "teacher_loss": 0.2856075167655945 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6003162860870361, + "learning_rate": 2.9611178921823746e-05, + "loss": 0.2748, + "step": 8424, + "teacher_loss": 0.23865661025047302 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6882692575454712, + "learning_rate": 2.96106649752907e-05, + "loss": 0.3216, + "step": 8425, + "teacher_loss": 0.2808833122253418 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.39154383540153503, + "learning_rate": 2.9610150693778907e-05, + "loss": 0.2728, + "step": 8426, + "teacher_loss": 0.25956955552101135 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4956713914871216, + "learning_rate": 2.9609636077300163e-05, + "loss": 0.3618, + "step": 8427, + "teacher_loss": 0.3469092845916748 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6076470613479614, + "learning_rate": 2.960912112586627e-05, + "loss": 0.2896, + "step": 8428, + "teacher_loss": 0.2542904019355774 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6040419340133667, + "learning_rate": 2.960860583948902e-05, + "loss": 0.4315, + "step": 8429, + "teacher_loss": 0.4123265743255615 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.28677159547805786, + "learning_rate": 2.9608090218180245e-05, + "loss": 0.1555, + "step": 8430, + "teacher_loss": 0.14092107117176056 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.6993134021759033, + "learning_rate": 2.9607574261951756e-05, + "loss": 0.2685, + "step": 8431, + "teacher_loss": 0.22059330344200134 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.5418664216995239, + "learning_rate": 2.9607057970815387e-05, + "loss": 0.2752, + "step": 8432, + "teacher_loss": 0.24559202790260315 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.2492949366569519, + "learning_rate": 2.960654134478297e-05, + "loss": 0.2395, + "step": 8433, + "teacher_loss": 0.23839811980724335 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.34120193123817444, + "learning_rate": 2.960602438386635e-05, + "loss": 0.3824, + "step": 8434, + "teacher_loss": 0.38692623376846313 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.579842746257782, + "learning_rate": 2.9605507088077385e-05, + "loss": 0.3205, + "step": 8435, + "teacher_loss": 0.2916521430015564 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.4096500873565674, + "learning_rate": 2.9604989457427934e-05, + "loss": 0.366, + "step": 8436, + "teacher_loss": 0.3611446022987366 + }, + { + "compression_loss": 0.0, + "epoch": 1.52, + "label_loss": 0.41288426518440247, + "learning_rate": 2.9604471491929863e-05, + "loss": 0.2535, + "step": 8437, + "teacher_loss": 0.23582306504249573 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.3749391436576843, + "learning_rate": 2.9603953191595046e-05, + "loss": 0.2738, + "step": 8438, + "teacher_loss": 0.26256895065307617 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.2884719967842102, + "learning_rate": 2.9603434556435365e-05, + "loss": 0.3065, + "step": 8439, + "teacher_loss": 0.30855679512023926 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.45339787006378174, + "learning_rate": 2.9602915586462718e-05, + "loss": 0.2386, + "step": 8440, + "teacher_loss": 0.21468129754066467 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.9239440560340881, + "learning_rate": 2.9602396281688994e-05, + "loss": 0.2757, + "step": 8441, + "teacher_loss": 0.203645259141922 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.4192727208137512, + "learning_rate": 2.9601876642126105e-05, + "loss": 0.2139, + "step": 8442, + "teacher_loss": 0.19105207920074463 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.8145973682403564, + "learning_rate": 2.9601356667785963e-05, + "loss": 0.4099, + "step": 8443, + "teacher_loss": 0.3649258613586426 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.30364200472831726, + "learning_rate": 2.960083635868049e-05, + "loss": 0.2535, + "step": 8444, + "teacher_loss": 0.2479751706123352 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.7078992128372192, + "learning_rate": 2.9600315714821617e-05, + "loss": 0.2779, + "step": 8445, + "teacher_loss": 0.23008784651756287 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.4667588472366333, + "learning_rate": 2.9599794736221276e-05, + "loss": 0.2413, + "step": 8446, + "teacher_loss": 0.21621307730674744 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.41662049293518066, + "learning_rate": 2.9599273422891413e-05, + "loss": 0.2877, + "step": 8447, + "teacher_loss": 0.27338707447052 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.4033508896827698, + "learning_rate": 2.9598751774843986e-05, + "loss": 0.2376, + "step": 8448, + "teacher_loss": 0.21923065185546875 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.8118084669113159, + "learning_rate": 2.9598229792090948e-05, + "loss": 0.3252, + "step": 8449, + "teacher_loss": 0.27113327383995056 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.6977510452270508, + "learning_rate": 2.9597707474644265e-05, + "loss": 0.2627, + "step": 8450, + "teacher_loss": 0.2143951952457428 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.11370211839675903, + "learning_rate": 2.9597184822515915e-05, + "loss": 0.1815, + "step": 8451, + "teacher_loss": 0.18902304768562317 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.29347214102745056, + "learning_rate": 2.959666183571789e-05, + "loss": 0.2508, + "step": 8452, + "teacher_loss": 0.2460668832063675 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.7737855911254883, + "learning_rate": 2.9596138514262166e-05, + "loss": 0.3101, + "step": 8453, + "teacher_loss": 0.2585286796092987 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.31305283308029175, + "learning_rate": 2.959561485816075e-05, + "loss": 0.2311, + "step": 8454, + "teacher_loss": 0.2220320701599121 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.2743084132671356, + "learning_rate": 2.959509086742564e-05, + "loss": 0.2088, + "step": 8455, + "teacher_loss": 0.20153355598449707 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.7727459669113159, + "learning_rate": 2.9594566542068865e-05, + "loss": 0.3668, + "step": 8456, + "teacher_loss": 0.3217070400714874 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.44734567403793335, + "learning_rate": 2.9594041882102426e-05, + "loss": 0.3409, + "step": 8457, + "teacher_loss": 0.3290509283542633 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.49452483654022217, + "learning_rate": 2.9593516887538365e-05, + "loss": 0.2615, + "step": 8458, + "teacher_loss": 0.23566311597824097 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.5902209877967834, + "learning_rate": 2.9592991558388715e-05, + "loss": 0.31, + "step": 8459, + "teacher_loss": 0.2788805365562439 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.5106395483016968, + "learning_rate": 2.9592465894665526e-05, + "loss": 0.4375, + "step": 8460, + "teacher_loss": 0.4294174611568451 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.2021930068731308, + "learning_rate": 2.9591939896380836e-05, + "loss": 0.2524, + "step": 8461, + "teacher_loss": 0.25796014070510864 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.3023834228515625, + "learning_rate": 2.9591413563546722e-05, + "loss": 0.2313, + "step": 8462, + "teacher_loss": 0.2233705222606659 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 1.266172170639038, + "learning_rate": 2.9590886896175234e-05, + "loss": 0.3023, + "step": 8463, + "teacher_loss": 0.19525475800037384 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.3610774874687195, + "learning_rate": 2.9590359894278458e-05, + "loss": 0.2568, + "step": 8464, + "teacher_loss": 0.24524801969528198 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.8323571681976318, + "learning_rate": 2.9589832557868476e-05, + "loss": 0.3361, + "step": 8465, + "teacher_loss": 0.28099486231803894 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.30585744976997375, + "learning_rate": 2.9589304886957373e-05, + "loss": 0.2587, + "step": 8466, + "teacher_loss": 0.25347477197647095 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.39869269728660583, + "learning_rate": 2.9588776881557253e-05, + "loss": 0.2135, + "step": 8467, + "teacher_loss": 0.1929522156715393 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.29899516701698303, + "learning_rate": 2.9588248541680216e-05, + "loss": 0.2565, + "step": 8468, + "teacher_loss": 0.2518104016780853 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.6047368049621582, + "learning_rate": 2.958771986733838e-05, + "loss": 0.2896, + "step": 8469, + "teacher_loss": 0.25462162494659424 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 1.1784284114837646, + "learning_rate": 2.9587190858543864e-05, + "loss": 0.3237, + "step": 8470, + "teacher_loss": 0.22872406244277954 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.7156506776809692, + "learning_rate": 2.9586661515308793e-05, + "loss": 0.3385, + "step": 8471, + "teacher_loss": 0.2965621054172516 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.11129346489906311, + "learning_rate": 2.9586131837645308e-05, + "loss": 0.1733, + "step": 8472, + "teacher_loss": 0.18018320202827454 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.39405733346939087, + "learning_rate": 2.9585601825565554e-05, + "loss": 0.3225, + "step": 8473, + "teacher_loss": 0.3145187497138977 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.47947466373443604, + "learning_rate": 2.9585071479081675e-05, + "loss": 0.2253, + "step": 8474, + "teacher_loss": 0.19707363843917847 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.2032071352005005, + "learning_rate": 2.958454079820584e-05, + "loss": 0.146, + "step": 8475, + "teacher_loss": 0.13969644904136658 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.30624833703041077, + "learning_rate": 2.958400978295021e-05, + "loss": 0.2666, + "step": 8476, + "teacher_loss": 0.2622242271900177 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 1.054046869277954, + "learning_rate": 2.958347843332696e-05, + "loss": 0.3451, + "step": 8477, + "teacher_loss": 0.2663172483444214 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.5247926115989685, + "learning_rate": 2.9582946749348273e-05, + "loss": 0.3117, + "step": 8478, + "teacher_loss": 0.2880529761314392 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.6808921098709106, + "learning_rate": 2.958241473102634e-05, + "loss": 0.3854, + "step": 8479, + "teacher_loss": 0.3525335490703583 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.2587246596813202, + "learning_rate": 2.9581882378373354e-05, + "loss": 0.2725, + "step": 8480, + "teacher_loss": 0.27407407760620117 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.43125879764556885, + "learning_rate": 2.9581349691401527e-05, + "loss": 0.2876, + "step": 8481, + "teacher_loss": 0.27168384194374084 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.6107392907142639, + "learning_rate": 2.958081667012307e-05, + "loss": 0.3078, + "step": 8482, + "teacher_loss": 0.27408719062805176 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.4485079050064087, + "learning_rate": 2.95802833145502e-05, + "loss": 0.2621, + "step": 8483, + "teacher_loss": 0.24143168330192566 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.4252525269985199, + "learning_rate": 2.957974962469515e-05, + "loss": 0.2547, + "step": 8484, + "teacher_loss": 0.2357458472251892 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 1.4223284721374512, + "learning_rate": 2.9579215600570152e-05, + "loss": 0.3845, + "step": 8485, + "teacher_loss": 0.2691500186920166 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.5256362557411194, + "learning_rate": 2.9578681242187448e-05, + "loss": 0.2397, + "step": 8486, + "teacher_loss": 0.20793354511260986 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.34112221002578735, + "learning_rate": 2.9578146549559296e-05, + "loss": 0.1687, + "step": 8487, + "teacher_loss": 0.14957356452941895 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.20230218768119812, + "learning_rate": 2.9577611522697946e-05, + "loss": 0.2023, + "step": 8488, + "teacher_loss": 0.20225536823272705 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.28243154287338257, + "learning_rate": 2.9577076161615675e-05, + "loss": 0.2195, + "step": 8489, + "teacher_loss": 0.21251662075519562 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.3175084888935089, + "learning_rate": 2.9576540466324753e-05, + "loss": 0.2757, + "step": 8490, + "teacher_loss": 0.27106496691703796 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.7194525599479675, + "learning_rate": 2.957600443683746e-05, + "loss": 0.2751, + "step": 8491, + "teacher_loss": 0.22571350634098053 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.3478257656097412, + "learning_rate": 2.957546807316608e-05, + "loss": 0.265, + "step": 8492, + "teacher_loss": 0.2557602524757385 + }, + { + "compression_loss": 0.0, + "epoch": 1.53, + "label_loss": 0.37659671902656555, + "learning_rate": 2.9574931375322927e-05, + "loss": 0.293, + "step": 8493, + "teacher_loss": 0.28370219469070435 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.39596056938171387, + "learning_rate": 2.9574394343320285e-05, + "loss": 0.3361, + "step": 8494, + "teacher_loss": 0.32943981885910034 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.2551983594894409, + "learning_rate": 2.9573856977170484e-05, + "loss": 0.281, + "step": 8495, + "teacher_loss": 0.28387588262557983 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.22610679268836975, + "learning_rate": 2.9573319276885837e-05, + "loss": 0.1834, + "step": 8496, + "teacher_loss": 0.1787102222442627 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5078145265579224, + "learning_rate": 2.957278124247867e-05, + "loss": 0.35, + "step": 8497, + "teacher_loss": 0.3324176073074341 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.9952711462974548, + "learning_rate": 2.957224287396132e-05, + "loss": 0.4073, + "step": 8498, + "teacher_loss": 0.34200337529182434 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.6877197027206421, + "learning_rate": 2.957170417134613e-05, + "loss": 0.3768, + "step": 8499, + "teacher_loss": 0.34229886531829834 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.37241944670677185, + "learning_rate": 2.9571165134645455e-05, + "loss": 0.2671, + "step": 8500, + "teacher_loss": 0.2553884983062744 + }, + { + "epoch": 1.54, + "eval_exact_match": 79.6972563859981, + "eval_f1": 87.18427986193834, + "step": 8500 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.7474992275238037, + "learning_rate": 2.957062576387165e-05, + "loss": 0.4629, + "step": 8501, + "teacher_loss": 0.4313093423843384 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.44652196764945984, + "learning_rate": 2.9570086059037077e-05, + "loss": 0.216, + "step": 8502, + "teacher_loss": 0.19041018187999725 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.25524282455444336, + "learning_rate": 2.956954602015412e-05, + "loss": 0.2797, + "step": 8503, + "teacher_loss": 0.28237634897232056 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.582446277141571, + "learning_rate": 2.956900564723515e-05, + "loss": 0.2829, + "step": 8504, + "teacher_loss": 0.24963778257369995 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.2064216136932373, + "learning_rate": 2.956846494029256e-05, + "loss": 0.1592, + "step": 8505, + "teacher_loss": 0.15399357676506042 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3322484791278839, + "learning_rate": 2.9567923899338748e-05, + "loss": 0.2651, + "step": 8506, + "teacher_loss": 0.2576685845851898 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.6278872489929199, + "learning_rate": 2.9567382524386122e-05, + "loss": 0.2171, + "step": 8507, + "teacher_loss": 0.1714574247598648 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.17011165618896484, + "learning_rate": 2.9566840815447088e-05, + "loss": 0.1861, + "step": 8508, + "teacher_loss": 0.1878291368484497 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5332197546958923, + "learning_rate": 2.9566298772534065e-05, + "loss": 0.2753, + "step": 8509, + "teacher_loss": 0.24668832123279572 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.28810805082321167, + "learning_rate": 2.9565756395659485e-05, + "loss": 0.3266, + "step": 8510, + "teacher_loss": 0.3309100866317749 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3236047029495239, + "learning_rate": 2.9565213684835782e-05, + "loss": 0.2578, + "step": 8511, + "teacher_loss": 0.25046277046203613 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.6556010842323303, + "learning_rate": 2.9564670640075395e-05, + "loss": 0.299, + "step": 8512, + "teacher_loss": 0.25935590267181396 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5609636902809143, + "learning_rate": 2.956412726139078e-05, + "loss": 0.4056, + "step": 8513, + "teacher_loss": 0.3883693218231201 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.6695277690887451, + "learning_rate": 2.9563583548794394e-05, + "loss": 0.2479, + "step": 8514, + "teacher_loss": 0.2010360062122345 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.9909501075744629, + "learning_rate": 2.9563039502298698e-05, + "loss": 0.3945, + "step": 8515, + "teacher_loss": 0.32819777727127075 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.543024480342865, + "learning_rate": 2.956249512191617e-05, + "loss": 0.2847, + "step": 8516, + "teacher_loss": 0.2560148239135742 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5923643112182617, + "learning_rate": 2.9561950407659287e-05, + "loss": 0.2285, + "step": 8517, + "teacher_loss": 0.1880713403224945 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3293316960334778, + "learning_rate": 2.9561405359540544e-05, + "loss": 0.3, + "step": 8518, + "teacher_loss": 0.29673877358436584 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.42155328392982483, + "learning_rate": 2.956085997757243e-05, + "loss": 0.2727, + "step": 8519, + "teacher_loss": 0.2562018036842346 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.22038277983665466, + "learning_rate": 2.956031426176746e-05, + "loss": 0.2658, + "step": 8520, + "teacher_loss": 0.27079641819000244 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 1.7243285179138184, + "learning_rate": 2.9559768212138128e-05, + "loss": 0.5099, + "step": 8521, + "teacher_loss": 0.3749206066131592 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5286412239074707, + "learning_rate": 2.9559221828696968e-05, + "loss": 0.2426, + "step": 8522, + "teacher_loss": 0.2108011543750763 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.4789571762084961, + "learning_rate": 2.9558675111456502e-05, + "loss": 0.2901, + "step": 8523, + "teacher_loss": 0.26915454864501953 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3270968496799469, + "learning_rate": 2.9558128060429262e-05, + "loss": 0.1653, + "step": 8524, + "teacher_loss": 0.14727026224136353 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3637726902961731, + "learning_rate": 2.95575806756278e-05, + "loss": 0.2587, + "step": 8525, + "teacher_loss": 0.246974378824234 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.6164846420288086, + "learning_rate": 2.9557032957064654e-05, + "loss": 0.3489, + "step": 8526, + "teacher_loss": 0.31919431686401367 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3185744881629944, + "learning_rate": 2.9556484904752388e-05, + "loss": 0.21, + "step": 8527, + "teacher_loss": 0.19795897603034973 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.35134902596473694, + "learning_rate": 2.9555936518703564e-05, + "loss": 0.2579, + "step": 8528, + "teacher_loss": 0.2475559115409851 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 1.3769758939743042, + "learning_rate": 2.955538779893076e-05, + "loss": 0.5702, + "step": 8529, + "teacher_loss": 0.4805184304714203 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5651233792304993, + "learning_rate": 2.9554838745446545e-05, + "loss": 0.3449, + "step": 8530, + "teacher_loss": 0.32047462463378906 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 1.7869309186935425, + "learning_rate": 2.9554289358263528e-05, + "loss": 0.3986, + "step": 8531, + "teacher_loss": 0.2443714290857315 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.8655029535293579, + "learning_rate": 2.9553739637394283e-05, + "loss": 0.3367, + "step": 8532, + "teacher_loss": 0.2779577374458313 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3091878890991211, + "learning_rate": 2.955318958285142e-05, + "loss": 0.2513, + "step": 8533, + "teacher_loss": 0.24482083320617676 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3840416967868805, + "learning_rate": 2.955263919464756e-05, + "loss": 0.2422, + "step": 8534, + "teacher_loss": 0.22649219632148743 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5083248615264893, + "learning_rate": 2.9552088472795314e-05, + "loss": 0.2554, + "step": 8535, + "teacher_loss": 0.2272791862487793 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.45723170042037964, + "learning_rate": 2.9551537417307307e-05, + "loss": 0.3238, + "step": 8536, + "teacher_loss": 0.30900412797927856 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.4478493332862854, + "learning_rate": 2.955098602819617e-05, + "loss": 0.2271, + "step": 8537, + "teacher_loss": 0.20261050760746002 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.30162131786346436, + "learning_rate": 2.9550434305474556e-05, + "loss": 0.2749, + "step": 8538, + "teacher_loss": 0.27192869782447815 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.7538201212882996, + "learning_rate": 2.95498822491551e-05, + "loss": 0.3638, + "step": 8539, + "teacher_loss": 0.3205137252807617 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.4037252962589264, + "learning_rate": 2.9549329859250477e-05, + "loss": 0.3005, + "step": 8540, + "teacher_loss": 0.28901785612106323 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.2924758791923523, + "learning_rate": 2.9548777135773338e-05, + "loss": 0.2976, + "step": 8541, + "teacher_loss": 0.29822444915771484 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3618459105491638, + "learning_rate": 2.9548224078736356e-05, + "loss": 0.2886, + "step": 8542, + "teacher_loss": 0.28047671914100647 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.3465486466884613, + "learning_rate": 2.9547670688152212e-05, + "loss": 0.2236, + "step": 8543, + "teacher_loss": 0.20996011793613434 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.49834638833999634, + "learning_rate": 2.9547116964033602e-05, + "loss": 0.3097, + "step": 8544, + "teacher_loss": 0.2887871265411377 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.496936559677124, + "learning_rate": 2.9546562906393208e-05, + "loss": 0.3024, + "step": 8545, + "teacher_loss": 0.28076890110969543 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5599715709686279, + "learning_rate": 2.9546008515243745e-05, + "loss": 0.2679, + "step": 8546, + "teacher_loss": 0.23540370166301727 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.8040772676467896, + "learning_rate": 2.9545453790597915e-05, + "loss": 0.5235, + "step": 8547, + "teacher_loss": 0.4922882318496704 + }, + { + "compression_loss": 0.0, + "epoch": 1.54, + "label_loss": 0.5149890780448914, + "learning_rate": 2.9544898732468438e-05, + "loss": 0.2335, + "step": 8548, + "teacher_loss": 0.2022087424993515 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.6115249395370483, + "learning_rate": 2.9544343340868042e-05, + "loss": 0.3089, + "step": 8549, + "teacher_loss": 0.2752223312854767 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.7015501260757446, + "learning_rate": 2.954378761580946e-05, + "loss": 0.2938, + "step": 8550, + "teacher_loss": 0.24852949380874634 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5815811157226562, + "learning_rate": 2.9543231557305436e-05, + "loss": 0.2919, + "step": 8551, + "teacher_loss": 0.259745717048645 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.22638264298439026, + "learning_rate": 2.9542675165368708e-05, + "loss": 0.1974, + "step": 8552, + "teacher_loss": 0.19422048330307007 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.2468913048505783, + "learning_rate": 2.9542118440012043e-05, + "loss": 0.1948, + "step": 8553, + "teacher_loss": 0.1889914572238922 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.7018353343009949, + "learning_rate": 2.9541561381248203e-05, + "loss": 0.2781, + "step": 8554, + "teacher_loss": 0.23104478418827057 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.6007068753242493, + "learning_rate": 2.9541003989089956e-05, + "loss": 0.2717, + "step": 8555, + "teacher_loss": 0.23517441749572754 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5439329147338867, + "learning_rate": 2.9540446263550085e-05, + "loss": 0.2358, + "step": 8556, + "teacher_loss": 0.20154231786727905 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.8904017806053162, + "learning_rate": 2.9539888204641377e-05, + "loss": 0.3434, + "step": 8557, + "teacher_loss": 0.2826574146747589 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3239039480686188, + "learning_rate": 2.9539329812376624e-05, + "loss": 0.2546, + "step": 8558, + "teacher_loss": 0.2469239979982376 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3068544864654541, + "learning_rate": 2.953877108676863e-05, + "loss": 0.2504, + "step": 8559, + "teacher_loss": 0.24412289261817932 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.2580702304840088, + "learning_rate": 2.9538212027830203e-05, + "loss": 0.3587, + "step": 8560, + "teacher_loss": 0.36991024017333984 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.44764161109924316, + "learning_rate": 2.9537652635574162e-05, + "loss": 0.1909, + "step": 8561, + "teacher_loss": 0.16238421201705933 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4043823778629303, + "learning_rate": 2.9537092910013334e-05, + "loss": 0.3002, + "step": 8562, + "teacher_loss": 0.2885870337486267 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5871374607086182, + "learning_rate": 2.953653285116055e-05, + "loss": 0.3009, + "step": 8563, + "teacher_loss": 0.2691356837749481 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.26736289262771606, + "learning_rate": 2.9535972459028648e-05, + "loss": 0.27, + "step": 8564, + "teacher_loss": 0.2703286111354828 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.1722472906112671, + "learning_rate": 2.953541173363048e-05, + "loss": 0.2205, + "step": 8565, + "teacher_loss": 0.22588962316513062 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4597053825855255, + "learning_rate": 2.9534850674978903e-05, + "loss": 0.2865, + "step": 8566, + "teacher_loss": 0.26726406812667847 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.2582359313964844, + "learning_rate": 2.9534289283086776e-05, + "loss": 0.2335, + "step": 8567, + "teacher_loss": 0.23078656196594238 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.24019068479537964, + "learning_rate": 2.953372755796697e-05, + "loss": 0.1791, + "step": 8568, + "teacher_loss": 0.17230644822120667 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.25019025802612305, + "learning_rate": 2.9533165499632367e-05, + "loss": 0.203, + "step": 8569, + "teacher_loss": 0.1977461278438568 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5243014097213745, + "learning_rate": 2.9532603108095855e-05, + "loss": 0.25, + "step": 8570, + "teacher_loss": 0.21947431564331055 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.6994187235832214, + "learning_rate": 2.953204038337032e-05, + "loss": 0.4322, + "step": 8571, + "teacher_loss": 0.4024886190891266 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.458823025226593, + "learning_rate": 2.953147732546867e-05, + "loss": 0.4275, + "step": 8572, + "teacher_loss": 0.42399123311042786 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5695011615753174, + "learning_rate": 2.953091393440381e-05, + "loss": 0.3098, + "step": 8573, + "teacher_loss": 0.28099048137664795 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3701797127723694, + "learning_rate": 2.9530350210188662e-05, + "loss": 0.1993, + "step": 8574, + "teacher_loss": 0.1803111433982849 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5282086730003357, + "learning_rate": 2.952978615283615e-05, + "loss": 0.243, + "step": 8575, + "teacher_loss": 0.21136143803596497 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.27152663469314575, + "learning_rate": 2.9529221762359203e-05, + "loss": 0.1898, + "step": 8576, + "teacher_loss": 0.18074044585227966 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4015180468559265, + "learning_rate": 2.9528657038770758e-05, + "loss": 0.1714, + "step": 8577, + "teacher_loss": 0.1458512842655182 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 1.4023849964141846, + "learning_rate": 2.952809198208377e-05, + "loss": 0.4972, + "step": 8578, + "teacher_loss": 0.3966098725795746 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.15761852264404297, + "learning_rate": 2.952752659231119e-05, + "loss": 0.1654, + "step": 8579, + "teacher_loss": 0.16629338264465332 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4133000373840332, + "learning_rate": 2.952696086946598e-05, + "loss": 0.3466, + "step": 8580, + "teacher_loss": 0.3392438292503357 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.42583203315734863, + "learning_rate": 2.952639481356111e-05, + "loss": 0.3473, + "step": 8581, + "teacher_loss": 0.3385217785835266 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3133378028869629, + "learning_rate": 2.952582842460956e-05, + "loss": 0.2694, + "step": 8582, + "teacher_loss": 0.26452380418777466 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3498862683773041, + "learning_rate": 2.9525261702624316e-05, + "loss": 0.1953, + "step": 8583, + "teacher_loss": 0.1781367063522339 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.6899369955062866, + "learning_rate": 2.952469464761837e-05, + "loss": 0.2645, + "step": 8584, + "teacher_loss": 0.21727266907691956 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3239683508872986, + "learning_rate": 2.9524127259604724e-05, + "loss": 0.2319, + "step": 8585, + "teacher_loss": 0.22169393301010132 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4004390835762024, + "learning_rate": 2.9523559538596383e-05, + "loss": 0.2269, + "step": 8586, + "teacher_loss": 0.20760062336921692 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.6449419260025024, + "learning_rate": 2.952299148460637e-05, + "loss": 0.2802, + "step": 8587, + "teacher_loss": 0.2396230399608612 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.1920221447944641, + "learning_rate": 2.9522423097647696e-05, + "loss": 0.2104, + "step": 8588, + "teacher_loss": 0.21244044601917267 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.36759212613105774, + "learning_rate": 2.9521854377733408e-05, + "loss": 0.1968, + "step": 8589, + "teacher_loss": 0.17787227034568787 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.43617868423461914, + "learning_rate": 2.952128532487654e-05, + "loss": 0.2641, + "step": 8590, + "teacher_loss": 0.24499253928661346 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5856689214706421, + "learning_rate": 2.9520715939090132e-05, + "loss": 0.355, + "step": 8591, + "teacher_loss": 0.329356849193573 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.2470080554485321, + "learning_rate": 2.9520146220387244e-05, + "loss": 0.2621, + "step": 8592, + "teacher_loss": 0.2637979984283447 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.3483297824859619, + "learning_rate": 2.9519576168780936e-05, + "loss": 0.183, + "step": 8593, + "teacher_loss": 0.1646513044834137 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.6476221680641174, + "learning_rate": 2.951900578428428e-05, + "loss": 0.3424, + "step": 8594, + "teacher_loss": 0.3084837794303894 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4343809485435486, + "learning_rate": 2.951843506691035e-05, + "loss": 0.2517, + "step": 8595, + "teacher_loss": 0.23135775327682495 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4594653844833374, + "learning_rate": 2.9517864016672233e-05, + "loss": 0.2655, + "step": 8596, + "teacher_loss": 0.24399425089359283 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.7300125360488892, + "learning_rate": 2.951729263358302e-05, + "loss": 0.3612, + "step": 8597, + "teacher_loss": 0.3201755881309509 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5028871297836304, + "learning_rate": 2.9516720917655813e-05, + "loss": 0.3218, + "step": 8598, + "teacher_loss": 0.3016747832298279 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.36113494634628296, + "learning_rate": 2.951614886890372e-05, + "loss": 0.2666, + "step": 8599, + "teacher_loss": 0.2560886740684509 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.12891221046447754, + "learning_rate": 2.9515576487339854e-05, + "loss": 0.1842, + "step": 8600, + "teacher_loss": 0.19030898809432983 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.8552432060241699, + "learning_rate": 2.9515003772977337e-05, + "loss": 0.3132, + "step": 8601, + "teacher_loss": 0.25296127796173096 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.4133296012878418, + "learning_rate": 2.9514430725829304e-05, + "loss": 0.2334, + "step": 8602, + "teacher_loss": 0.2134297788143158 + }, + { + "compression_loss": 0.0, + "epoch": 1.55, + "label_loss": 0.5385497808456421, + "learning_rate": 2.951385734590889e-05, + "loss": 0.3226, + "step": 8603, + "teacher_loss": 0.29856711626052856 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.35052624344825745, + "learning_rate": 2.9513283633229245e-05, + "loss": 0.2253, + "step": 8604, + "teacher_loss": 0.2113742083311081 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.7931466102600098, + "learning_rate": 2.9512709587803515e-05, + "loss": 0.4882, + "step": 8605, + "teacher_loss": 0.4543372392654419 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.7294799089431763, + "learning_rate": 2.9512135209644867e-05, + "loss": 0.4756, + "step": 8606, + "teacher_loss": 0.4474000334739685 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.3064729571342468, + "learning_rate": 2.951156049876647e-05, + "loss": 0.2051, + "step": 8607, + "teacher_loss": 0.19379834830760956 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.3581099510192871, + "learning_rate": 2.9510985455181497e-05, + "loss": 0.292, + "step": 8608, + "teacher_loss": 0.28469568490982056 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5751882195472717, + "learning_rate": 2.9510410078903134e-05, + "loss": 0.3178, + "step": 8609, + "teacher_loss": 0.28923600912094116 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.7669243812561035, + "learning_rate": 2.950983436994457e-05, + "loss": 0.4255, + "step": 8610, + "teacher_loss": 0.38756227493286133 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.38386228680610657, + "learning_rate": 2.950925832831901e-05, + "loss": 0.2894, + "step": 8611, + "teacher_loss": 0.27894073724746704 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.7692005634307861, + "learning_rate": 2.950868195403966e-05, + "loss": 0.3097, + "step": 8612, + "teacher_loss": 0.25859326124191284 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5795905590057373, + "learning_rate": 2.9508105247119728e-05, + "loss": 0.2738, + "step": 8613, + "teacher_loss": 0.23978012800216675 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.3799271285533905, + "learning_rate": 2.950752820757244e-05, + "loss": 0.2276, + "step": 8614, + "teacher_loss": 0.21062520146369934 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.4844506084918976, + "learning_rate": 2.950695083541103e-05, + "loss": 0.2722, + "step": 8615, + "teacher_loss": 0.2486034631729126 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.4205459654331207, + "learning_rate": 2.9506373130648725e-05, + "loss": 0.1791, + "step": 8616, + "teacher_loss": 0.15226785838603973 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5086053013801575, + "learning_rate": 2.950579509329878e-05, + "loss": 0.2174, + "step": 8617, + "teacher_loss": 0.18498878180980682 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.41754621267318726, + "learning_rate": 2.9505216723374442e-05, + "loss": 0.4361, + "step": 8618, + "teacher_loss": 0.4382110834121704 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.6171455383300781, + "learning_rate": 2.9504638020888977e-05, + "loss": 0.2053, + "step": 8619, + "teacher_loss": 0.15951156616210938 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.33646854758262634, + "learning_rate": 2.950405898585565e-05, + "loss": 0.2962, + "step": 8620, + "teacher_loss": 0.29176414012908936 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.41980230808258057, + "learning_rate": 2.950347961828773e-05, + "loss": 0.3017, + "step": 8621, + "teacher_loss": 0.2885623276233673 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.37618187069892883, + "learning_rate": 2.950289991819851e-05, + "loss": 0.2436, + "step": 8622, + "teacher_loss": 0.22891265153884888 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.9587826728820801, + "learning_rate": 2.9502319885601277e-05, + "loss": 0.4785, + "step": 8623, + "teacher_loss": 0.42515599727630615 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.458151638507843, + "learning_rate": 2.9501739520509328e-05, + "loss": 0.2018, + "step": 8624, + "teacher_loss": 0.17326241731643677 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.767421543598175, + "learning_rate": 2.950115882293597e-05, + "loss": 0.4138, + "step": 8625, + "teacher_loss": 0.37455683946609497 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.7027378082275391, + "learning_rate": 2.950057779289452e-05, + "loss": 0.2745, + "step": 8626, + "teacher_loss": 0.2269049882888794 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5277151465415955, + "learning_rate": 2.9499996430398296e-05, + "loss": 0.185, + "step": 8627, + "teacher_loss": 0.14696623384952545 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.732877790927887, + "learning_rate": 2.9499414735460625e-05, + "loss": 0.7453, + "step": 8628, + "teacher_loss": 0.7467131614685059 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.6541773080825806, + "learning_rate": 2.9498832708094845e-05, + "loss": 0.3327, + "step": 8629, + "teacher_loss": 0.29699188470840454 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.3356805145740509, + "learning_rate": 2.9498250348314302e-05, + "loss": 0.1882, + "step": 8630, + "teacher_loss": 0.17180192470550537 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5871535539627075, + "learning_rate": 2.9497667656132345e-05, + "loss": 0.2289, + "step": 8631, + "teacher_loss": 0.18904179334640503 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.33741694688796997, + "learning_rate": 2.9497084631562333e-05, + "loss": 0.2673, + "step": 8632, + "teacher_loss": 0.25950896739959717 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.2990621030330658, + "learning_rate": 2.949650127461764e-05, + "loss": 0.2196, + "step": 8633, + "teacher_loss": 0.21081838011741638 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.34680747985839844, + "learning_rate": 2.9495917585311635e-05, + "loss": 0.1839, + "step": 8634, + "teacher_loss": 0.1658465564250946 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.2762005925178528, + "learning_rate": 2.9495333563657698e-05, + "loss": 0.2558, + "step": 8635, + "teacher_loss": 0.253488153219223 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.29490700364112854, + "learning_rate": 2.949474920966922e-05, + "loss": 0.2907, + "step": 8636, + "teacher_loss": 0.29023826122283936 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.24283906817436218, + "learning_rate": 2.9494164523359606e-05, + "loss": 0.3005, + "step": 8637, + "teacher_loss": 0.3069021999835968 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.2504567503929138, + "learning_rate": 2.949357950474225e-05, + "loss": 0.3505, + "step": 8638, + "teacher_loss": 0.3616335093975067 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.3886566162109375, + "learning_rate": 2.9492994153830576e-05, + "loss": 0.3128, + "step": 8639, + "teacher_loss": 0.3043721616268158 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5396826267242432, + "learning_rate": 2.949240847063799e-05, + "loss": 0.3215, + "step": 8640, + "teacher_loss": 0.2972286343574524 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.7378908395767212, + "learning_rate": 2.9491822455177936e-05, + "loss": 0.3676, + "step": 8641, + "teacher_loss": 0.3265003263950348 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.46372678875923157, + "learning_rate": 2.9491236107463837e-05, + "loss": 0.3135, + "step": 8642, + "teacher_loss": 0.29681527614593506 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.6142773628234863, + "learning_rate": 2.9490649427509142e-05, + "loss": 0.3107, + "step": 8643, + "teacher_loss": 0.27697986364364624 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.15381619334220886, + "learning_rate": 2.9490062415327305e-05, + "loss": 0.2601, + "step": 8644, + "teacher_loss": 0.27186357975006104 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.2013280689716339, + "learning_rate": 2.9489475070931777e-05, + "loss": 0.1788, + "step": 8645, + "teacher_loss": 0.17626135051250458 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.21912944316864014, + "learning_rate": 2.9488887394336025e-05, + "loss": 0.2175, + "step": 8646, + "teacher_loss": 0.21733978390693665 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.31355148553848267, + "learning_rate": 2.9488299385553525e-05, + "loss": 0.2795, + "step": 8647, + "teacher_loss": 0.2756844758987427 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.504398763179779, + "learning_rate": 2.9487711044597764e-05, + "loss": 0.3061, + "step": 8648, + "teacher_loss": 0.28407424688339233 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.2495500147342682, + "learning_rate": 2.9487122371482218e-05, + "loss": 0.2214, + "step": 8649, + "teacher_loss": 0.21832308173179626 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.41466641426086426, + "learning_rate": 2.9486533366220394e-05, + "loss": 0.2543, + "step": 8650, + "teacher_loss": 0.236490860581398 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5795063972473145, + "learning_rate": 2.9485944028825794e-05, + "loss": 0.2645, + "step": 8651, + "teacher_loss": 0.22944766283035278 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.48607945442199707, + "learning_rate": 2.9485354359311927e-05, + "loss": 0.2294, + "step": 8652, + "teacher_loss": 0.20084503293037415 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.5190906524658203, + "learning_rate": 2.9484764357692318e-05, + "loss": 0.3855, + "step": 8653, + "teacher_loss": 0.3707018196582794 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.235230952501297, + "learning_rate": 2.9484174023980482e-05, + "loss": 0.233, + "step": 8654, + "teacher_loss": 0.23275862634181976 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.25842565298080444, + "learning_rate": 2.948358335818997e-05, + "loss": 0.1882, + "step": 8655, + "teacher_loss": 0.18038642406463623 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.39177829027175903, + "learning_rate": 2.948299236033431e-05, + "loss": 0.2503, + "step": 8656, + "teacher_loss": 0.2345782071352005 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.3635066747665405, + "learning_rate": 2.9482401030427057e-05, + "loss": 0.2271, + "step": 8657, + "teacher_loss": 0.21193882822990417 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.953407883644104, + "learning_rate": 2.9481809368481772e-05, + "loss": 0.4145, + "step": 8658, + "teacher_loss": 0.3545989990234375 + }, + { + "compression_loss": 0.0, + "epoch": 1.56, + "label_loss": 0.44426149129867554, + "learning_rate": 2.9481217374512014e-05, + "loss": 0.2011, + "step": 8659, + "teacher_loss": 0.17405922710895538 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.7031523585319519, + "learning_rate": 2.948062504853136e-05, + "loss": 0.4002, + "step": 8660, + "teacher_loss": 0.3665161728858948 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.8205397725105286, + "learning_rate": 2.948003239055339e-05, + "loss": 0.3741, + "step": 8661, + "teacher_loss": 0.3244902491569519 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.2737279534339905, + "learning_rate": 2.947943940059169e-05, + "loss": 0.2798, + "step": 8662, + "teacher_loss": 0.2804777920246124 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.515887975692749, + "learning_rate": 2.9478846078659856e-05, + "loss": 0.3231, + "step": 8663, + "teacher_loss": 0.301708847284317 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.7884516716003418, + "learning_rate": 2.947825242477149e-05, + "loss": 0.4601, + "step": 8664, + "teacher_loss": 0.42365455627441406 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.6113412380218506, + "learning_rate": 2.9477658438940204e-05, + "loss": 0.3117, + "step": 8665, + "teacher_loss": 0.2784503102302551 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.3069119453430176, + "learning_rate": 2.9477064121179618e-05, + "loss": 0.2648, + "step": 8666, + "teacher_loss": 0.26009976863861084 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.7723685503005981, + "learning_rate": 2.9476469471503357e-05, + "loss": 0.3094, + "step": 8667, + "teacher_loss": 0.25794416666030884 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.390103280544281, + "learning_rate": 2.9475874489925052e-05, + "loss": 0.2345, + "step": 8668, + "teacher_loss": 0.21724724769592285 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.3343648314476013, + "learning_rate": 2.9475279176458343e-05, + "loss": 0.2503, + "step": 8669, + "teacher_loss": 0.24100151658058167 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.27346912026405334, + "learning_rate": 2.9474683531116886e-05, + "loss": 0.2282, + "step": 8670, + "teacher_loss": 0.22317636013031006 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.43901297450065613, + "learning_rate": 2.947408755391433e-05, + "loss": 0.3392, + "step": 8671, + "teacher_loss": 0.32808756828308105 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.4289487600326538, + "learning_rate": 2.9473491244864347e-05, + "loss": 0.2616, + "step": 8672, + "teacher_loss": 0.2430020123720169 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.695624053478241, + "learning_rate": 2.94728946039806e-05, + "loss": 0.4484, + "step": 8673, + "teacher_loss": 0.420968234539032 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.2031000256538391, + "learning_rate": 2.947229763127677e-05, + "loss": 0.2004, + "step": 8674, + "teacher_loss": 0.2000676393508911 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.47121721506118774, + "learning_rate": 2.947170032676655e-05, + "loss": 0.3252, + "step": 8675, + "teacher_loss": 0.30897989869117737 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 1.1334383487701416, + "learning_rate": 2.9471102690463625e-05, + "loss": 0.8145, + "step": 8676, + "teacher_loss": 0.7790335416793823 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.8293523788452148, + "learning_rate": 2.94705047223817e-05, + "loss": 0.3816, + "step": 8677, + "teacher_loss": 0.33182045817375183 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.509623110294342, + "learning_rate": 2.9469906422534495e-05, + "loss": 0.3289, + "step": 8678, + "teacher_loss": 0.3087852895259857 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.7502414584159851, + "learning_rate": 2.9469307790935712e-05, + "loss": 0.3968, + "step": 8679, + "teacher_loss": 0.35755759477615356 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.4372464120388031, + "learning_rate": 2.9468708827599087e-05, + "loss": 0.2328, + "step": 8680, + "teacher_loss": 0.21011799573898315 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.7151311635971069, + "learning_rate": 2.9468109532538346e-05, + "loss": 0.2722, + "step": 8681, + "teacher_loss": 0.2229982167482376 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.6397551894187927, + "learning_rate": 2.9467509905767228e-05, + "loss": 0.2399, + "step": 8682, + "teacher_loss": 0.19542112946510315 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 1.0269132852554321, + "learning_rate": 2.946690994729949e-05, + "loss": 1.0594, + "step": 8683, + "teacher_loss": 1.06304931640625 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.49443960189819336, + "learning_rate": 2.9466309657148876e-05, + "loss": 0.3232, + "step": 8684, + "teacher_loss": 0.3041972517967224 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.48493123054504395, + "learning_rate": 2.9465709035329157e-05, + "loss": 0.4258, + "step": 8685, + "teacher_loss": 0.41920238733291626 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.37493348121643066, + "learning_rate": 2.9465108081854098e-05, + "loss": 0.2559, + "step": 8686, + "teacher_loss": 0.24265140295028687 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.22569403052330017, + "learning_rate": 2.9464506796737478e-05, + "loss": 0.2537, + "step": 8687, + "teacher_loss": 0.2567977011203766 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.6183544397354126, + "learning_rate": 2.9463905179993086e-05, + "loss": 0.2689, + "step": 8688, + "teacher_loss": 0.23011058568954468 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.6737602353096008, + "learning_rate": 2.9463303231634717e-05, + "loss": 0.4056, + "step": 8689, + "teacher_loss": 0.37575221061706543 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.27392086386680603, + "learning_rate": 2.946270095167616e-05, + "loss": 0.2009, + "step": 8690, + "teacher_loss": 0.1928154081106186 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.43287795782089233, + "learning_rate": 2.9462098340131238e-05, + "loss": 0.194, + "step": 8691, + "teacher_loss": 0.16751116514205933 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.674911618232727, + "learning_rate": 2.9461495397013757e-05, + "loss": 0.3927, + "step": 8692, + "teacher_loss": 0.36130642890930176 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 1.1586272716522217, + "learning_rate": 2.946089212233755e-05, + "loss": 0.9276, + "step": 8693, + "teacher_loss": 0.9018793702125549 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.5047844052314758, + "learning_rate": 2.9460288516116437e-05, + "loss": 0.2565, + "step": 8694, + "teacher_loss": 0.22893774509429932 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.9601619839668274, + "learning_rate": 2.9459684578364262e-05, + "loss": 0.811, + "step": 8695, + "teacher_loss": 0.7943848371505737 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.16960570216178894, + "learning_rate": 2.9459080309094878e-05, + "loss": 0.1898, + "step": 8696, + "teacher_loss": 0.19198976457118988 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.361625075340271, + "learning_rate": 2.9458475708322128e-05, + "loss": 0.2546, + "step": 8697, + "teacher_loss": 0.24269789457321167 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.3513258099555969, + "learning_rate": 2.945787077605988e-05, + "loss": 0.2637, + "step": 8698, + "teacher_loss": 0.2539609968662262 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.3751927614212036, + "learning_rate": 2.9457265512322004e-05, + "loss": 0.2585, + "step": 8699, + "teacher_loss": 0.2455640733242035 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.17260484397411346, + "learning_rate": 2.9456659917122372e-05, + "loss": 0.1817, + "step": 8700, + "teacher_loss": 0.1826876401901245 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.33278852701187134, + "learning_rate": 2.9456053990474872e-05, + "loss": 0.2075, + "step": 8701, + "teacher_loss": 0.19360914826393127 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.18315088748931885, + "learning_rate": 2.9455447732393395e-05, + "loss": 0.2115, + "step": 8702, + "teacher_loss": 0.21463073790073395 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.6013479232788086, + "learning_rate": 2.9454841142891846e-05, + "loss": 0.2516, + "step": 8703, + "teacher_loss": 0.21268539130687714 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.2560952603816986, + "learning_rate": 2.9454234221984123e-05, + "loss": 0.1683, + "step": 8704, + "teacher_loss": 0.15859296917915344 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.4482208788394928, + "learning_rate": 2.9453626969684145e-05, + "loss": 0.2242, + "step": 8705, + "teacher_loss": 0.1993495523929596 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.5013689994812012, + "learning_rate": 2.9453019386005836e-05, + "loss": 0.2748, + "step": 8706, + "teacher_loss": 0.24963030219078064 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.7967034578323364, + "learning_rate": 2.9452411470963122e-05, + "loss": 0.6523, + "step": 8707, + "teacher_loss": 0.6362035274505615 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.43818965554237366, + "learning_rate": 2.9451803224569952e-05, + "loss": 0.2722, + "step": 8708, + "teacher_loss": 0.2537902295589447 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.26041287183761597, + "learning_rate": 2.9451194646840255e-05, + "loss": 0.1915, + "step": 8709, + "teacher_loss": 0.1838574856519699 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.8815737962722778, + "learning_rate": 2.9450585737787996e-05, + "loss": 0.3609, + "step": 8710, + "teacher_loss": 0.30306780338287354 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 1.0208110809326172, + "learning_rate": 2.9449976497427127e-05, + "loss": 0.5073, + "step": 8711, + "teacher_loss": 0.45028334856033325 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.5190050601959229, + "learning_rate": 2.9449366925771622e-05, + "loss": 0.3359, + "step": 8712, + "teacher_loss": 0.31552836298942566 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.501338541507721, + "learning_rate": 2.9448757022835453e-05, + "loss": 0.2748, + "step": 8713, + "teacher_loss": 0.24967384338378906 + }, + { + "compression_loss": 0.0, + "epoch": 1.57, + "label_loss": 0.5927071571350098, + "learning_rate": 2.9448146788632607e-05, + "loss": 0.2387, + "step": 8714, + "teacher_loss": 0.19934552907943726 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.4255698025226593, + "learning_rate": 2.9447536223177072e-05, + "loss": 0.3575, + "step": 8715, + "teacher_loss": 0.34990426898002625 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.376946359872818, + "learning_rate": 2.944692532648285e-05, + "loss": 0.3571, + "step": 8716, + "teacher_loss": 0.35493841767311096 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.22233924269676208, + "learning_rate": 2.944631409856394e-05, + "loss": 0.2113, + "step": 8717, + "teacher_loss": 0.210092693567276 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.2584851384162903, + "learning_rate": 2.9445702539434363e-05, + "loss": 0.3611, + "step": 8718, + "teacher_loss": 0.3724518418312073 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 1.0913103818893433, + "learning_rate": 2.9445090649108137e-05, + "loss": 0.3637, + "step": 8719, + "teacher_loss": 0.2828175127506256 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6493569612503052, + "learning_rate": 2.9444478427599293e-05, + "loss": 0.5667, + "step": 8720, + "teacher_loss": 0.5574923753738403 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.42301419377326965, + "learning_rate": 2.9443865874921865e-05, + "loss": 0.3224, + "step": 8721, + "teacher_loss": 0.3112179636955261 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6200509071350098, + "learning_rate": 2.9443252991089892e-05, + "loss": 0.3029, + "step": 8722, + "teacher_loss": 0.26768988370895386 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.38496601581573486, + "learning_rate": 2.9442639776117436e-05, + "loss": 0.314, + "step": 8723, + "teacher_loss": 0.30615168809890747 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.489969402551651, + "learning_rate": 2.9442026230018554e-05, + "loss": 0.3517, + "step": 8724, + "teacher_loss": 0.3362851142883301 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.26782041788101196, + "learning_rate": 2.9441412352807304e-05, + "loss": 0.2607, + "step": 8725, + "teacher_loss": 0.2599547207355499 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.5229651927947998, + "learning_rate": 2.944079814449777e-05, + "loss": 0.2216, + "step": 8726, + "teacher_loss": 0.18807940185070038 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.617690920829773, + "learning_rate": 2.9440183605104027e-05, + "loss": 0.4033, + "step": 8727, + "teacher_loss": 0.3794995844364166 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.5342797636985779, + "learning_rate": 2.943956873464017e-05, + "loss": 0.3394, + "step": 8728, + "teacher_loss": 0.3177253007888794 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.37761110067367554, + "learning_rate": 2.9438953533120293e-05, + "loss": 0.2774, + "step": 8729, + "teacher_loss": 0.266218900680542 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.35491907596588135, + "learning_rate": 2.9438338000558503e-05, + "loss": 0.2331, + "step": 8730, + "teacher_loss": 0.2195407748222351 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3313908874988556, + "learning_rate": 2.943772213696891e-05, + "loss": 0.3137, + "step": 8731, + "teacher_loss": 0.3117339313030243 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.5769060254096985, + "learning_rate": 2.943710594236563e-05, + "loss": 0.3104, + "step": 8732, + "teacher_loss": 0.2808319330215454 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.523478090763092, + "learning_rate": 2.9436489416762807e-05, + "loss": 0.2641, + "step": 8733, + "teacher_loss": 0.2352854609489441 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 1.2077131271362305, + "learning_rate": 2.9435872560174557e-05, + "loss": 0.8056, + "step": 8734, + "teacher_loss": 0.7609001398086548 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.21983544528484344, + "learning_rate": 2.943525537261503e-05, + "loss": 0.2328, + "step": 8735, + "teacher_loss": 0.23427045345306396 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3821295499801636, + "learning_rate": 2.9434637854098377e-05, + "loss": 0.2829, + "step": 8736, + "teacher_loss": 0.27187561988830566 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.41544967889785767, + "learning_rate": 2.9434020004638757e-05, + "loss": 0.1965, + "step": 8737, + "teacher_loss": 0.17214302718639374 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6012518405914307, + "learning_rate": 2.9433401824250334e-05, + "loss": 0.2176, + "step": 8738, + "teacher_loss": 0.1749608814716339 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3901960849761963, + "learning_rate": 2.943278331294728e-05, + "loss": 0.2576, + "step": 8739, + "teacher_loss": 0.24285341799259186 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3115026652812958, + "learning_rate": 2.9432164470743776e-05, + "loss": 0.1959, + "step": 8740, + "teacher_loss": 0.1830420047044754 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.10500791668891907, + "learning_rate": 2.943154529765401e-05, + "loss": 0.2311, + "step": 8741, + "teacher_loss": 0.24511590600013733 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3917456865310669, + "learning_rate": 2.9430925793692177e-05, + "loss": 0.1838, + "step": 8742, + "teacher_loss": 0.16064569354057312 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3375360667705536, + "learning_rate": 2.9430305958872483e-05, + "loss": 0.2094, + "step": 8743, + "teacher_loss": 0.19510847330093384 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.7353867292404175, + "learning_rate": 2.942968579320914e-05, + "loss": 0.2872, + "step": 8744, + "teacher_loss": 0.23743407428264618 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.45641446113586426, + "learning_rate": 2.9429065296716363e-05, + "loss": 0.2682, + "step": 8745, + "teacher_loss": 0.2472875863313675 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.5614002346992493, + "learning_rate": 2.9428444469408375e-05, + "loss": 0.2358, + "step": 8746, + "teacher_loss": 0.1995747834444046 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.17120251059532166, + "learning_rate": 2.942782331129942e-05, + "loss": 0.2027, + "step": 8747, + "teacher_loss": 0.2061944305896759 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.28657540678977966, + "learning_rate": 2.9427201822403734e-05, + "loss": 0.2689, + "step": 8748, + "teacher_loss": 0.26695016026496887 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.26850274205207825, + "learning_rate": 2.942658000273556e-05, + "loss": 0.2922, + "step": 8749, + "teacher_loss": 0.2947887182235718 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.40771299600601196, + "learning_rate": 2.942595785230916e-05, + "loss": 0.2184, + "step": 8750, + "teacher_loss": 0.19737279415130615 + }, + { + "epoch": 1.58, + "eval_exact_match": 79.29990539262063, + "eval_f1": 86.90499520309895, + "step": 8750 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6159347295761108, + "learning_rate": 2.9425335371138802e-05, + "loss": 0.2751, + "step": 8751, + "teacher_loss": 0.23721975088119507 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.41514527797698975, + "learning_rate": 2.942471255923875e-05, + "loss": 0.2699, + "step": 8752, + "teacher_loss": 0.2537892460823059 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.4908739924430847, + "learning_rate": 2.942408941662329e-05, + "loss": 0.2353, + "step": 8753, + "teacher_loss": 0.20689064264297485 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.483184814453125, + "learning_rate": 2.9423465943306703e-05, + "loss": 0.2318, + "step": 8754, + "teacher_loss": 0.2039155662059784 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6774777770042419, + "learning_rate": 2.9422842139303283e-05, + "loss": 0.223, + "step": 8755, + "teacher_loss": 0.172515869140625 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.29104936122894287, + "learning_rate": 2.9422218004627338e-05, + "loss": 0.2397, + "step": 8756, + "teacher_loss": 0.23404385149478912 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.4680928885936737, + "learning_rate": 2.9421593539293173e-05, + "loss": 0.2057, + "step": 8757, + "teacher_loss": 0.1765434741973877 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.4866985082626343, + "learning_rate": 2.942096874331511e-05, + "loss": 0.2151, + "step": 8758, + "teacher_loss": 0.18488512933254242 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6421576738357544, + "learning_rate": 2.9420343616707465e-05, + "loss": 0.3877, + "step": 8759, + "teacher_loss": 0.3594168424606323 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.41616418957710266, + "learning_rate": 2.941971815948458e-05, + "loss": 0.259, + "step": 8760, + "teacher_loss": 0.2415849268436432 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6059359312057495, + "learning_rate": 2.9419092371660784e-05, + "loss": 0.5688, + "step": 8761, + "teacher_loss": 0.5646283626556396 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.6384348273277283, + "learning_rate": 2.9418466253250434e-05, + "loss": 0.3895, + "step": 8762, + "teacher_loss": 0.36187058687210083 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.13635368645191193, + "learning_rate": 2.941783980426788e-05, + "loss": 0.1427, + "step": 8763, + "teacher_loss": 0.14343036711215973 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.5273498296737671, + "learning_rate": 2.941721302472749e-05, + "loss": 0.4556, + "step": 8764, + "teacher_loss": 0.4476405382156372 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3698505163192749, + "learning_rate": 2.9416585914643627e-05, + "loss": 0.2754, + "step": 8765, + "teacher_loss": 0.26494157314300537 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.23717889189720154, + "learning_rate": 2.9415958474030675e-05, + "loss": 0.2968, + "step": 8766, + "teacher_loss": 0.303442120552063 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.21350878477096558, + "learning_rate": 2.9415330702903015e-05, + "loss": 0.1652, + "step": 8767, + "teacher_loss": 0.15985023975372314 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.3323150873184204, + "learning_rate": 2.941470260127504e-05, + "loss": 0.2343, + "step": 8768, + "teacher_loss": 0.22344408929347992 + }, + { + "compression_loss": 0.0, + "epoch": 1.58, + "label_loss": 0.29647210240364075, + "learning_rate": 2.9414074169161152e-05, + "loss": 0.2225, + "step": 8769, + "teacher_loss": 0.21433106064796448 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.378919780254364, + "learning_rate": 2.9413445406575762e-05, + "loss": 0.3768, + "step": 8770, + "teacher_loss": 0.37657803297042847 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.17018666863441467, + "learning_rate": 2.941281631353328e-05, + "loss": 0.1868, + "step": 8771, + "teacher_loss": 0.18868331611156464 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.21196800470352173, + "learning_rate": 2.941218689004813e-05, + "loss": 0.3198, + "step": 8772, + "teacher_loss": 0.33178332448005676 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.5520363450050354, + "learning_rate": 2.941155713613475e-05, + "loss": 0.338, + "step": 8773, + "teacher_loss": 0.3142227232456207 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.31486591696739197, + "learning_rate": 2.9410927051807568e-05, + "loss": 0.3105, + "step": 8774, + "teacher_loss": 0.3100135326385498 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.45442333817481995, + "learning_rate": 2.9410296637081036e-05, + "loss": 0.2527, + "step": 8775, + "teacher_loss": 0.23032473027706146 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.8308318257331848, + "learning_rate": 2.9409665891969612e-05, + "loss": 0.32, + "step": 8776, + "teacher_loss": 0.26328548789024353 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.7619255781173706, + "learning_rate": 2.9409034816487745e-05, + "loss": 0.3055, + "step": 8777, + "teacher_loss": 0.25475484132766724 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.46176600456237793, + "learning_rate": 2.940840341064991e-05, + "loss": 0.2804, + "step": 8778, + "teacher_loss": 0.2602023780345917 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.29514408111572266, + "learning_rate": 2.9407771674470585e-05, + "loss": 0.2513, + "step": 8779, + "teacher_loss": 0.24644513428211212 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.38248908519744873, + "learning_rate": 2.9407139607964256e-05, + "loss": 0.2957, + "step": 8780, + "teacher_loss": 0.28605979681015015 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.29497385025024414, + "learning_rate": 2.9406507211145405e-05, + "loss": 0.2376, + "step": 8781, + "teacher_loss": 0.23122447729110718 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.3572332262992859, + "learning_rate": 2.9405874484028536e-05, + "loss": 0.3327, + "step": 8782, + "teacher_loss": 0.3300093412399292 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.3291904330253601, + "learning_rate": 2.940524142662816e-05, + "loss": 0.2485, + "step": 8783, + "teacher_loss": 0.23957999050617218 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.8445284366607666, + "learning_rate": 2.940460803895879e-05, + "loss": 0.362, + "step": 8784, + "teacher_loss": 0.3083879351615906 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.946256160736084, + "learning_rate": 2.9403974321034937e-05, + "loss": 0.5835, + "step": 8785, + "teacher_loss": 0.5431619882583618 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.40027478337287903, + "learning_rate": 2.9403340272871142e-05, + "loss": 0.305, + "step": 8786, + "teacher_loss": 0.2943701148033142 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.4661625921726227, + "learning_rate": 2.940270589448194e-05, + "loss": 0.2734, + "step": 8787, + "teacher_loss": 0.2520264685153961 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.8481431603431702, + "learning_rate": 2.940207118588187e-05, + "loss": 0.5574, + "step": 8788, + "teacher_loss": 0.5250433683395386 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.4612758159637451, + "learning_rate": 2.940143614708549e-05, + "loss": 0.2386, + "step": 8789, + "teacher_loss": 0.21381625533103943 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.6391294002532959, + "learning_rate": 2.9400800778107357e-05, + "loss": 0.3985, + "step": 8790, + "teacher_loss": 0.3717923164367676 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.5397456288337708, + "learning_rate": 2.9400165078962035e-05, + "loss": 0.2853, + "step": 8791, + "teacher_loss": 0.2570365071296692 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.44662588834762573, + "learning_rate": 2.9399529049664104e-05, + "loss": 0.3602, + "step": 8792, + "teacher_loss": 0.3506399095058441 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.2950704097747803, + "learning_rate": 2.9398892690228147e-05, + "loss": 0.1975, + "step": 8793, + "teacher_loss": 0.1866893470287323 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.2504015564918518, + "learning_rate": 2.9398256000668745e-05, + "loss": 0.1922, + "step": 8794, + "teacher_loss": 0.1857699751853943 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.30887988209724426, + "learning_rate": 2.9397618981000502e-05, + "loss": 0.2637, + "step": 8795, + "teacher_loss": 0.25866103172302246 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.12059152126312256, + "learning_rate": 2.9396981631238024e-05, + "loss": 0.1736, + "step": 8796, + "teacher_loss": 0.179483100771904 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.6224748492240906, + "learning_rate": 2.9396343951395923e-05, + "loss": 0.3494, + "step": 8797, + "teacher_loss": 0.3190913200378418 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.13256776332855225, + "learning_rate": 2.9395705941488814e-05, + "loss": 0.1653, + "step": 8798, + "teacher_loss": 0.1689566671848297 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.35983216762542725, + "learning_rate": 2.939506760153133e-05, + "loss": 0.2239, + "step": 8799, + "teacher_loss": 0.20875108242034912 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.4718207120895386, + "learning_rate": 2.93944289315381e-05, + "loss": 0.4454, + "step": 8800, + "teacher_loss": 0.44250959157943726 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.21875980496406555, + "learning_rate": 2.939378993152378e-05, + "loss": 0.2277, + "step": 8801, + "teacher_loss": 0.22866126894950867 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.6279621124267578, + "learning_rate": 2.939315060150301e-05, + "loss": 0.3313, + "step": 8802, + "teacher_loss": 0.2983255088329315 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.21201640367507935, + "learning_rate": 2.9392510941490444e-05, + "loss": 0.1893, + "step": 8803, + "teacher_loss": 0.1867557317018509 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.26410508155822754, + "learning_rate": 2.9391870951500757e-05, + "loss": 0.2153, + "step": 8804, + "teacher_loss": 0.20991788804531097 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.326290100812912, + "learning_rate": 2.939123063154862e-05, + "loss": 0.2197, + "step": 8805, + "teacher_loss": 0.2078983634710312 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.332580029964447, + "learning_rate": 2.9390589981648708e-05, + "loss": 0.3575, + "step": 8806, + "teacher_loss": 0.36025679111480713 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.527552604675293, + "learning_rate": 2.9389949001815712e-05, + "loss": 0.1968, + "step": 8807, + "teacher_loss": 0.16002824902534485 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.5263674259185791, + "learning_rate": 2.938930769206433e-05, + "loss": 0.3668, + "step": 8808, + "teacher_loss": 0.3490758538246155 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.3165118098258972, + "learning_rate": 2.9388666052409268e-05, + "loss": 0.1799, + "step": 8809, + "teacher_loss": 0.16470378637313843 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.31869256496429443, + "learning_rate": 2.938802408286523e-05, + "loss": 0.2042, + "step": 8810, + "teacher_loss": 0.19147758185863495 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.4055965542793274, + "learning_rate": 2.9387381783446937e-05, + "loss": 0.2503, + "step": 8811, + "teacher_loss": 0.23299652338027954 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.5883247256278992, + "learning_rate": 2.9386739154169115e-05, + "loss": 0.2993, + "step": 8812, + "teacher_loss": 0.267169713973999 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.621792733669281, + "learning_rate": 2.9386096195046496e-05, + "loss": 0.3092, + "step": 8813, + "teacher_loss": 0.2744479179382324 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.17292936146259308, + "learning_rate": 2.9385452906093828e-05, + "loss": 0.2174, + "step": 8814, + "teacher_loss": 0.2223585844039917 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.7421229481697083, + "learning_rate": 2.938480928732585e-05, + "loss": 0.3258, + "step": 8815, + "teacher_loss": 0.2794947624206543 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.5212373733520508, + "learning_rate": 2.938416533875733e-05, + "loss": 0.3001, + "step": 8816, + "teacher_loss": 0.27547842264175415 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.44826480746269226, + "learning_rate": 2.9383521060403012e-05, + "loss": 0.2851, + "step": 8817, + "teacher_loss": 0.26698511838912964 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.4052944779396057, + "learning_rate": 2.9382876452277688e-05, + "loss": 0.2459, + "step": 8818, + "teacher_loss": 0.22823883593082428 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.3844138979911804, + "learning_rate": 2.938223151439613e-05, + "loss": 0.1753, + "step": 8819, + "teacher_loss": 0.1520141065120697 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.3801078796386719, + "learning_rate": 2.9381586246773124e-05, + "loss": 0.1691, + "step": 8820, + "teacher_loss": 0.14569216966629028 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.566926896572113, + "learning_rate": 2.9380940649423462e-05, + "loss": 0.2326, + "step": 8821, + "teacher_loss": 0.1954750120639801 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.11366164684295654, + "learning_rate": 2.9380294722361943e-05, + "loss": 0.1464, + "step": 8822, + "teacher_loss": 0.15003632009029388 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 1.1682119369506836, + "learning_rate": 2.937964846560339e-05, + "loss": 0.3679, + "step": 8823, + "teacher_loss": 0.27901265025138855 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 0.38171201944351196, + "learning_rate": 2.93790018791626e-05, + "loss": 0.3226, + "step": 8824, + "teacher_loss": 0.3160545825958252 + }, + { + "compression_loss": 0.0, + "epoch": 1.59, + "label_loss": 1.2082462310791016, + "learning_rate": 2.9378354963054412e-05, + "loss": 0.5746, + "step": 8825, + "teacher_loss": 0.5041833519935608 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.3155685067176819, + "learning_rate": 2.9377707717293657e-05, + "loss": 0.2753, + "step": 8826, + "teacher_loss": 0.27086973190307617 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5268188118934631, + "learning_rate": 2.9377060141895167e-05, + "loss": 0.2459, + "step": 8827, + "teacher_loss": 0.2146688848733902 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6934584379196167, + "learning_rate": 2.9376412236873792e-05, + "loss": 0.2195, + "step": 8828, + "teacher_loss": 0.16678529977798462 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5331879258155823, + "learning_rate": 2.9375764002244386e-05, + "loss": 0.3056, + "step": 8829, + "teacher_loss": 0.2802583575248718 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.23903435468673706, + "learning_rate": 2.9375115438021815e-05, + "loss": 0.2087, + "step": 8830, + "teacher_loss": 0.2053188532590866 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5347850322723389, + "learning_rate": 2.9374466544220947e-05, + "loss": 0.3201, + "step": 8831, + "teacher_loss": 0.29626569151878357 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.56703782081604, + "learning_rate": 2.937381732085665e-05, + "loss": 0.3143, + "step": 8832, + "teacher_loss": 0.2862128019332886 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6218544244766235, + "learning_rate": 2.9373167767943826e-05, + "loss": 0.2971, + "step": 8833, + "teacher_loss": 0.26097577810287476 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.27498817443847656, + "learning_rate": 2.9372517885497357e-05, + "loss": 0.3458, + "step": 8834, + "teacher_loss": 0.353631854057312 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.3907712697982788, + "learning_rate": 2.937186767353214e-05, + "loss": 0.3253, + "step": 8835, + "teacher_loss": 0.31804704666137695 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6646755337715149, + "learning_rate": 2.9371217132063086e-05, + "loss": 0.2978, + "step": 8836, + "teacher_loss": 0.2570681869983673 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6205227375030518, + "learning_rate": 2.9370566261105113e-05, + "loss": 0.2011, + "step": 8837, + "teacher_loss": 0.1545419991016388 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.18541185557842255, + "learning_rate": 2.936991506067314e-05, + "loss": 0.2024, + "step": 8838, + "teacher_loss": 0.2042592465877533 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.8314276933670044, + "learning_rate": 2.9369263530782096e-05, + "loss": 0.4272, + "step": 8839, + "teacher_loss": 0.38223615288734436 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.32398173213005066, + "learning_rate": 2.9368611671446923e-05, + "loss": 0.2614, + "step": 8840, + "teacher_loss": 0.25439882278442383 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5677398443222046, + "learning_rate": 2.9367959482682564e-05, + "loss": 0.3202, + "step": 8841, + "teacher_loss": 0.29271644353866577 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.49968668818473816, + "learning_rate": 2.9367306964503968e-05, + "loss": 0.3073, + "step": 8842, + "teacher_loss": 0.28591781854629517 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.502788245677948, + "learning_rate": 2.93666541169261e-05, + "loss": 0.3987, + "step": 8843, + "teacher_loss": 0.38716983795166016 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.662237286567688, + "learning_rate": 2.936600093996393e-05, + "loss": 0.5754, + "step": 8844, + "teacher_loss": 0.5657215118408203 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6181904077529907, + "learning_rate": 2.936534743363243e-05, + "loss": 0.3062, + "step": 8845, + "teacher_loss": 0.27153658866882324 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.8824832439422607, + "learning_rate": 2.9364693597946583e-05, + "loss": 0.3706, + "step": 8846, + "teacher_loss": 0.31376832723617554 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.8539354205131531, + "learning_rate": 2.9364039432921374e-05, + "loss": 0.3004, + "step": 8847, + "teacher_loss": 0.23885369300842285 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.583228349685669, + "learning_rate": 2.936338493857181e-05, + "loss": 0.2084, + "step": 8848, + "teacher_loss": 0.16680654883384705 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.41357293725013733, + "learning_rate": 2.9362730114912892e-05, + "loss": 0.243, + "step": 8849, + "teacher_loss": 0.2240189164876938 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.21565717458724976, + "learning_rate": 2.9362074961959635e-05, + "loss": 0.2025, + "step": 8850, + "teacher_loss": 0.2010529637336731 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6318279504776001, + "learning_rate": 2.936141947972706e-05, + "loss": 0.2638, + "step": 8851, + "teacher_loss": 0.22289103269577026 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.4330080449581146, + "learning_rate": 2.936076366823019e-05, + "loss": 0.3472, + "step": 8852, + "teacher_loss": 0.33768126368522644 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.43628689646720886, + "learning_rate": 2.936010752748407e-05, + "loss": 0.3422, + "step": 8853, + "teacher_loss": 0.3317187428474426 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5459315776824951, + "learning_rate": 2.9359451057503734e-05, + "loss": 0.2907, + "step": 8854, + "teacher_loss": 0.262288361787796 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 1.1229536533355713, + "learning_rate": 2.9358794258304237e-05, + "loss": 0.3669, + "step": 8855, + "teacher_loss": 0.2828805446624756 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5496934056282043, + "learning_rate": 2.9358137129900638e-05, + "loss": 0.4022, + "step": 8856, + "teacher_loss": 0.38576555252075195 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6932355761528015, + "learning_rate": 2.9357479672308005e-05, + "loss": 0.323, + "step": 8857, + "teacher_loss": 0.2818843126296997 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.22394317388534546, + "learning_rate": 2.935682188554141e-05, + "loss": 0.2305, + "step": 8858, + "teacher_loss": 0.23123431205749512 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.30514782667160034, + "learning_rate": 2.935616376961593e-05, + "loss": 0.2555, + "step": 8859, + "teacher_loss": 0.24992826581001282 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.8922151327133179, + "learning_rate": 2.9355505324546658e-05, + "loss": 0.3217, + "step": 8860, + "teacher_loss": 0.2582681179046631 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.8770116567611694, + "learning_rate": 2.9354846550348685e-05, + "loss": 0.7195, + "step": 8861, + "teacher_loss": 0.7020071744918823 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.823535680770874, + "learning_rate": 2.9354187447037124e-05, + "loss": 0.3124, + "step": 8862, + "teacher_loss": 0.25558415055274963 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.6106618046760559, + "learning_rate": 2.935352801462708e-05, + "loss": 0.2767, + "step": 8863, + "teacher_loss": 0.23955821990966797 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5355809330940247, + "learning_rate": 2.9352868253133676e-05, + "loss": 0.2556, + "step": 8864, + "teacher_loss": 0.22453457117080688 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5119600296020508, + "learning_rate": 2.9352208162572028e-05, + "loss": 0.3949, + "step": 8865, + "teacher_loss": 0.3818776309490204 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.24631869792938232, + "learning_rate": 2.9351547742957284e-05, + "loss": 0.2712, + "step": 8866, + "teacher_loss": 0.27391621470451355 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.1506839394569397, + "learning_rate": 2.9350886994304572e-05, + "loss": 0.2215, + "step": 8867, + "teacher_loss": 0.22936293482780457 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5472217202186584, + "learning_rate": 2.935022591662905e-05, + "loss": 0.2448, + "step": 8868, + "teacher_loss": 0.21124999225139618 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.23334720730781555, + "learning_rate": 2.9349564509945875e-05, + "loss": 0.3058, + "step": 8869, + "teacher_loss": 0.31389400362968445 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5410623550415039, + "learning_rate": 2.9348902774270203e-05, + "loss": 0.3162, + "step": 8870, + "teacher_loss": 0.29125505685806274 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.5699049830436707, + "learning_rate": 2.934824070961721e-05, + "loss": 0.3406, + "step": 8871, + "teacher_loss": 0.31508421897888184 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.38524335622787476, + "learning_rate": 2.934757831600208e-05, + "loss": 0.3268, + "step": 8872, + "teacher_loss": 0.3203461766242981 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.7102386951446533, + "learning_rate": 2.934691559343999e-05, + "loss": 0.3387, + "step": 8873, + "teacher_loss": 0.2974720895290375 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.4853948950767517, + "learning_rate": 2.934625254194615e-05, + "loss": 0.2747, + "step": 8874, + "teacher_loss": 0.2513313293457031 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.7584381699562073, + "learning_rate": 2.9345589161535745e-05, + "loss": 0.3005, + "step": 8875, + "teacher_loss": 0.24961210787296295 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.27399343252182007, + "learning_rate": 2.9344925452223988e-05, + "loss": 0.2716, + "step": 8876, + "teacher_loss": 0.2713875472545624 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.4122808277606964, + "learning_rate": 2.9344261414026103e-05, + "loss": 0.2756, + "step": 8877, + "teacher_loss": 0.26036953926086426 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.37050020694732666, + "learning_rate": 2.9343597046957308e-05, + "loss": 0.2512, + "step": 8878, + "teacher_loss": 0.2379709780216217 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.45558881759643555, + "learning_rate": 2.934293235103284e-05, + "loss": 0.2352, + "step": 8879, + "teacher_loss": 0.2106691598892212 + }, + { + "compression_loss": 0.0, + "epoch": 1.6, + "label_loss": 0.357721209526062, + "learning_rate": 2.934226732626793e-05, + "loss": 0.2594, + "step": 8880, + "teacher_loss": 0.2484402060508728 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.38612765073776245, + "learning_rate": 2.9341601972677833e-05, + "loss": 0.2535, + "step": 8881, + "teacher_loss": 0.23880280554294586 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.1699083298444748, + "learning_rate": 2.9340936290277802e-05, + "loss": 0.1927, + "step": 8882, + "teacher_loss": 0.19521009922027588 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.7220228314399719, + "learning_rate": 2.93402702790831e-05, + "loss": 0.6361, + "step": 8883, + "teacher_loss": 0.6265135407447815 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.748738169670105, + "learning_rate": 2.9339603939108994e-05, + "loss": 0.4078, + "step": 8884, + "teacher_loss": 0.36989831924438477 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.512102484703064, + "learning_rate": 2.933893727037076e-05, + "loss": 0.3096, + "step": 8885, + "teacher_loss": 0.2870524823665619 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.18286707997322083, + "learning_rate": 2.9338270272883686e-05, + "loss": 0.1973, + "step": 8886, + "teacher_loss": 0.19886897504329681 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.5202264785766602, + "learning_rate": 2.9337602946663064e-05, + "loss": 0.26, + "step": 8887, + "teacher_loss": 0.23103934526443481 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.40252432227134705, + "learning_rate": 2.933693529172419e-05, + "loss": 0.2885, + "step": 8888, + "teacher_loss": 0.2757876217365265 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.5223032832145691, + "learning_rate": 2.9336267308082375e-05, + "loss": 0.2745, + "step": 8889, + "teacher_loss": 0.24700312316417694 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.4332813024520874, + "learning_rate": 2.9335598995752934e-05, + "loss": 0.1739, + "step": 8890, + "teacher_loss": 0.14508303999900818 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.7154121994972229, + "learning_rate": 2.933493035475119e-05, + "loss": 0.7203, + "step": 8891, + "teacher_loss": 0.7208747267723083 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.7543063759803772, + "learning_rate": 2.9334261385092472e-05, + "loss": 0.2971, + "step": 8892, + "teacher_loss": 0.24628636240959167 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.6075509190559387, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.4389, + "step": 8893, + "teacher_loss": 0.4201599657535553 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.1301405131816864, + "learning_rate": 2.933292245986546e-05, + "loss": 0.2355, + "step": 8894, + "teacher_loss": 0.24723877012729645 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.446560263633728, + "learning_rate": 2.9332252504327875e-05, + "loss": 0.2835, + "step": 8895, + "teacher_loss": 0.26541048288345337 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.24469871819019318, + "learning_rate": 2.9331582220194705e-05, + "loss": 0.3367, + "step": 8896, + "teacher_loss": 0.346964955329895 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.24737313389778137, + "learning_rate": 2.9330911607481324e-05, + "loss": 0.2348, + "step": 8897, + "teacher_loss": 0.23338279128074646 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.497220516204834, + "learning_rate": 2.9330240666203104e-05, + "loss": 0.3217, + "step": 8898, + "teacher_loss": 0.3021583557128906 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.4921434819698334, + "learning_rate": 2.932956939637543e-05, + "loss": 0.3439, + "step": 8899, + "teacher_loss": 0.327475368976593 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.38183653354644775, + "learning_rate": 2.9328897798013695e-05, + "loss": 0.1723, + "step": 8900, + "teacher_loss": 0.1489918828010559 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.29097434878349304, + "learning_rate": 2.932822587113329e-05, + "loss": 0.1939, + "step": 8901, + "teacher_loss": 0.1830824315547943 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.26875928044319153, + "learning_rate": 2.932755361574963e-05, + "loss": 0.2025, + "step": 8902, + "teacher_loss": 0.19511710107326508 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.723259687423706, + "learning_rate": 2.932688103187812e-05, + "loss": 0.2979, + "step": 8903, + "teacher_loss": 0.25059446692466736 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.6912726759910583, + "learning_rate": 2.932620811953418e-05, + "loss": 0.3043, + "step": 8904, + "teacher_loss": 0.2613466680049896 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.2814003825187683, + "learning_rate": 2.932553487873324e-05, + "loss": 0.2199, + "step": 8905, + "teacher_loss": 0.21309661865234375 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.4984525144100189, + "learning_rate": 2.9324861309490736e-05, + "loss": 0.2367, + "step": 8906, + "teacher_loss": 0.2076694667339325 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.7079552412033081, + "learning_rate": 2.932418741182211e-05, + "loss": 0.3026, + "step": 8907, + "teacher_loss": 0.25761228799819946 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.30388885736465454, + "learning_rate": 2.9323513185742813e-05, + "loss": 0.2059, + "step": 8908, + "teacher_loss": 0.1949751079082489 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.3619818687438965, + "learning_rate": 2.9322838631268303e-05, + "loss": 0.1825, + "step": 8909, + "teacher_loss": 0.16257047653198242 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.2845340371131897, + "learning_rate": 2.9322163748414044e-05, + "loss": 0.2222, + "step": 8910, + "teacher_loss": 0.215323805809021 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.2997760474681854, + "learning_rate": 2.9321488537195513e-05, + "loss": 0.1715, + "step": 8911, + "teacher_loss": 0.15723916888237 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.6677510738372803, + "learning_rate": 2.9320812997628184e-05, + "loss": 0.3061, + "step": 8912, + "teacher_loss": 0.2659376859664917 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.5900148749351501, + "learning_rate": 2.9320137129727553e-05, + "loss": 0.3644, + "step": 8913, + "teacher_loss": 0.3392926752567291 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.7351362109184265, + "learning_rate": 2.931946093350911e-05, + "loss": 0.3577, + "step": 8914, + "teacher_loss": 0.31580817699432373 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.29090964794158936, + "learning_rate": 2.931878440898836e-05, + "loss": 0.1732, + "step": 8915, + "teacher_loss": 0.16013041138648987 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.2587795853614807, + "learning_rate": 2.9318107556180812e-05, + "loss": 0.218, + "step": 8916, + "teacher_loss": 0.21343928575515747 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.47493237257003784, + "learning_rate": 2.9317430375101985e-05, + "loss": 0.2281, + "step": 8917, + "teacher_loss": 0.20071488618850708 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.2877231538295746, + "learning_rate": 2.931675286576741e-05, + "loss": 0.1851, + "step": 8918, + "teacher_loss": 0.17369705438613892 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.3706057071685791, + "learning_rate": 2.931607502819261e-05, + "loss": 0.3822, + "step": 8919, + "teacher_loss": 0.3835172653198242 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.5336982011795044, + "learning_rate": 2.9315396862393137e-05, + "loss": 0.2459, + "step": 8920, + "teacher_loss": 0.21395309269428253 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 1.0213711261749268, + "learning_rate": 2.931471836838453e-05, + "loss": 0.447, + "step": 8921, + "teacher_loss": 0.3831454813480377 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.4956536293029785, + "learning_rate": 2.9314039546182353e-05, + "loss": 0.2365, + "step": 8922, + "teacher_loss": 0.20772519707679749 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.35153013467788696, + "learning_rate": 2.931336039580216e-05, + "loss": 0.3114, + "step": 8923, + "teacher_loss": 0.3069232106208801 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.8284546136856079, + "learning_rate": 2.9312680917259527e-05, + "loss": 0.3213, + "step": 8924, + "teacher_loss": 0.26498955488204956 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.38693302869796753, + "learning_rate": 2.9312001110570034e-05, + "loss": 0.4074, + "step": 8925, + "teacher_loss": 0.409656286239624 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.6033519506454468, + "learning_rate": 2.9311320975749263e-05, + "loss": 0.2808, + "step": 8926, + "teacher_loss": 0.24496988952159882 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.773811936378479, + "learning_rate": 2.931064051281281e-05, + "loss": 0.3445, + "step": 8927, + "teacher_loss": 0.2968422770500183 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.528587818145752, + "learning_rate": 2.930995972177628e-05, + "loss": 0.3317, + "step": 8928, + "teacher_loss": 0.30982518196105957 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.38173651695251465, + "learning_rate": 2.9309278602655272e-05, + "loss": 0.4563, + "step": 8929, + "teacher_loss": 0.46462106704711914 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.2777811288833618, + "learning_rate": 2.930859715546541e-05, + "loss": 0.2323, + "step": 8930, + "teacher_loss": 0.2272282838821411 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.22249789535999298, + "learning_rate": 2.930791538022231e-05, + "loss": 0.2225, + "step": 8931, + "teacher_loss": 0.2224869430065155 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.6952388286590576, + "learning_rate": 2.9307233276941613e-05, + "loss": 0.5399, + "step": 8932, + "teacher_loss": 0.5226446390151978 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.8608593940734863, + "learning_rate": 2.9306550845638953e-05, + "loss": 0.2905, + "step": 8933, + "teacher_loss": 0.227136492729187 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.42483726143836975, + "learning_rate": 2.930586808632997e-05, + "loss": 0.2453, + "step": 8934, + "teacher_loss": 0.2253992259502411 + }, + { + "compression_loss": 0.0, + "epoch": 1.61, + "label_loss": 0.44151902198791504, + "learning_rate": 2.9305184999030324e-05, + "loss": 0.3266, + "step": 8935, + "teacher_loss": 0.31384968757629395 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.16030415892601013, + "learning_rate": 2.930450158375568e-05, + "loss": 0.1363, + "step": 8936, + "teacher_loss": 0.13366684317588806 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.2851826548576355, + "learning_rate": 2.93038178405217e-05, + "loss": 0.2697, + "step": 8937, + "teacher_loss": 0.26794740557670593 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.6795215010643005, + "learning_rate": 2.930313376934406e-05, + "loss": 0.3172, + "step": 8938, + "teacher_loss": 0.27696341276168823 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3784475326538086, + "learning_rate": 2.9302449370238447e-05, + "loss": 0.2441, + "step": 8939, + "teacher_loss": 0.22915582358837128 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.7547372579574585, + "learning_rate": 2.9301764643220553e-05, + "loss": 0.4154, + "step": 8940, + "teacher_loss": 0.37764161825180054 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3426719009876251, + "learning_rate": 2.930107958830607e-05, + "loss": 0.3605, + "step": 8941, + "teacher_loss": 0.3624359965324402 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.767907977104187, + "learning_rate": 2.9300394205510713e-05, + "loss": 0.2562, + "step": 8942, + "teacher_loss": 0.19929450750350952 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.41128912568092346, + "learning_rate": 2.9299708494850185e-05, + "loss": 0.2139, + "step": 8943, + "teacher_loss": 0.19191959500312805 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.29214543104171753, + "learning_rate": 2.929902245634022e-05, + "loss": 0.2764, + "step": 8944, + "teacher_loss": 0.274662584066391 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.567003071308136, + "learning_rate": 2.9298336089996538e-05, + "loss": 0.3016, + "step": 8945, + "teacher_loss": 0.27211514115333557 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.25821882486343384, + "learning_rate": 2.929764939583488e-05, + "loss": 0.2658, + "step": 8946, + "teacher_loss": 0.26661422848701477 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.38896602392196655, + "learning_rate": 2.929696237387099e-05, + "loss": 0.2485, + "step": 8947, + "teacher_loss": 0.23284614086151123 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4900261163711548, + "learning_rate": 2.9296275024120616e-05, + "loss": 0.2698, + "step": 8948, + "teacher_loss": 0.24528133869171143 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.7298829555511475, + "learning_rate": 2.9295587346599515e-05, + "loss": 0.4291, + "step": 8949, + "teacher_loss": 0.39565277099609375 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4621315896511078, + "learning_rate": 2.9294899341323456e-05, + "loss": 0.2236, + "step": 8950, + "teacher_loss": 0.1970585584640503 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.2600078880786896, + "learning_rate": 2.929421100830822e-05, + "loss": 0.2147, + "step": 8951, + "teacher_loss": 0.20970812439918518 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.16284877061843872, + "learning_rate": 2.9293522347569575e-05, + "loss": 0.2355, + "step": 8952, + "teacher_loss": 0.24357135593891144 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4467979669570923, + "learning_rate": 2.929283335912332e-05, + "loss": 0.3168, + "step": 8953, + "teacher_loss": 0.3023749887943268 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.5160260796546936, + "learning_rate": 2.929214404298525e-05, + "loss": 0.2287, + "step": 8954, + "teacher_loss": 0.196784108877182 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.19141361117362976, + "learning_rate": 2.929145439917116e-05, + "loss": 0.2554, + "step": 8955, + "teacher_loss": 0.2625039219856262 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3893775939941406, + "learning_rate": 2.9290764427696875e-05, + "loss": 0.2684, + "step": 8956, + "teacher_loss": 0.2549407482147217 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.18428722023963928, + "learning_rate": 2.9290074128578207e-05, + "loss": 0.1884, + "step": 8957, + "teacher_loss": 0.18889513611793518 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 1.0659804344177246, + "learning_rate": 2.9289383501830983e-05, + "loss": 0.6758, + "step": 8958, + "teacher_loss": 0.6324576139450073 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3802110254764557, + "learning_rate": 2.9288692547471036e-05, + "loss": 0.2067, + "step": 8959, + "teacher_loss": 0.18744316697120667 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.7226102948188782, + "learning_rate": 2.9288001265514205e-05, + "loss": 0.3208, + "step": 8960, + "teacher_loss": 0.2761363983154297 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.15415403246879578, + "learning_rate": 2.928730965597635e-05, + "loss": 0.2201, + "step": 8961, + "teacher_loss": 0.2274424135684967 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3802204132080078, + "learning_rate": 2.9286617718873317e-05, + "loss": 0.2865, + "step": 8962, + "teacher_loss": 0.27606001496315 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.21149007976055145, + "learning_rate": 2.928592545422097e-05, + "loss": 0.2358, + "step": 8963, + "teacher_loss": 0.2385154664516449 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.6890534162521362, + "learning_rate": 2.9285232862035188e-05, + "loss": 0.2898, + "step": 8964, + "teacher_loss": 0.24545976519584656 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.46577465534210205, + "learning_rate": 2.9284539942331845e-05, + "loss": 0.3066, + "step": 8965, + "teacher_loss": 0.2888607978820801 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.8376703262329102, + "learning_rate": 2.9283846695126826e-05, + "loss": 0.6335, + "step": 8966, + "teacher_loss": 0.6108400821685791 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.6768379807472229, + "learning_rate": 2.928315312043603e-05, + "loss": 0.3933, + "step": 8967, + "teacher_loss": 0.36178117990493774 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.48513320088386536, + "learning_rate": 2.9282459218275357e-05, + "loss": 0.2553, + "step": 8968, + "teacher_loss": 0.22979308664798737 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3122332692146301, + "learning_rate": 2.928176498866071e-05, + "loss": 0.2368, + "step": 8969, + "teacher_loss": 0.22839249670505524 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.23835495114326477, + "learning_rate": 2.9281070431608018e-05, + "loss": 0.1753, + "step": 8970, + "teacher_loss": 0.1682862937450409 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.27147623896598816, + "learning_rate": 2.928037554713319e-05, + "loss": 0.1924, + "step": 8971, + "teacher_loss": 0.18364593386650085 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.7178797721862793, + "learning_rate": 2.9279680335252173e-05, + "loss": 0.3122, + "step": 8972, + "teacher_loss": 0.26711034774780273 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4710690379142761, + "learning_rate": 2.9278984795980898e-05, + "loss": 0.2885, + "step": 8973, + "teacher_loss": 0.2681998908519745 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4934837818145752, + "learning_rate": 2.9278288929335308e-05, + "loss": 0.2292, + "step": 8974, + "teacher_loss": 0.1998106688261032 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4627881646156311, + "learning_rate": 2.9277592735331362e-05, + "loss": 0.2327, + "step": 8975, + "teacher_loss": 0.20712760090827942 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.6366965174674988, + "learning_rate": 2.9276896213985022e-05, + "loss": 0.3038, + "step": 8976, + "teacher_loss": 0.2667694687843323 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.35123080015182495, + "learning_rate": 2.9276199365312252e-05, + "loss": 0.302, + "step": 8977, + "teacher_loss": 0.296527624130249 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.508888304233551, + "learning_rate": 2.927550218932904e-05, + "loss": 0.2136, + "step": 8978, + "teacher_loss": 0.18074581027030945 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.25468921661376953, + "learning_rate": 2.9274804686051358e-05, + "loss": 0.1636, + "step": 8979, + "teacher_loss": 0.15351982414722443 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.2571745812892914, + "learning_rate": 2.9274106855495207e-05, + "loss": 0.204, + "step": 8980, + "teacher_loss": 0.19806994497776031 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.4964093565940857, + "learning_rate": 2.9273408697676578e-05, + "loss": 0.2828, + "step": 8981, + "teacher_loss": 0.25908321142196655 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.5554239749908447, + "learning_rate": 2.9272710212611483e-05, + "loss": 0.3144, + "step": 8982, + "teacher_loss": 0.2876272201538086 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.38468390703201294, + "learning_rate": 2.9272011400315937e-05, + "loss": 0.2257, + "step": 8983, + "teacher_loss": 0.20807360112667084 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.415330708026886, + "learning_rate": 2.9271312260805952e-05, + "loss": 0.223, + "step": 8984, + "teacher_loss": 0.2016492486000061 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.6870024800300598, + "learning_rate": 2.9270612794097572e-05, + "loss": 0.3938, + "step": 8985, + "teacher_loss": 0.36121055483818054 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.2554101049900055, + "learning_rate": 2.9269913000206826e-05, + "loss": 0.1735, + "step": 8986, + "teacher_loss": 0.1644374430179596 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.3364235460758209, + "learning_rate": 2.9269212879149754e-05, + "loss": 0.2214, + "step": 8987, + "teacher_loss": 0.20866690576076508 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.6370645761489868, + "learning_rate": 2.9268512430942414e-05, + "loss": 0.3021, + "step": 8988, + "teacher_loss": 0.26488587260246277 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.32711923122406006, + "learning_rate": 2.926781165560086e-05, + "loss": 0.2781, + "step": 8989, + "teacher_loss": 0.2726441025733948 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.9321675300598145, + "learning_rate": 2.926711055314116e-05, + "loss": 0.5197, + "step": 8990, + "teacher_loss": 0.4738415479660034 + }, + { + "compression_loss": 0.0, + "epoch": 1.62, + "label_loss": 0.34943169355392456, + "learning_rate": 2.9266409123579403e-05, + "loss": 0.1656, + "step": 8991, + "teacher_loss": 0.14518259465694427 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.32197678089141846, + "learning_rate": 2.9265707366931643e-05, + "loss": 0.2938, + "step": 8992, + "teacher_loss": 0.2906179428100586 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 1.026017665863037, + "learning_rate": 2.9265005283213996e-05, + "loss": 0.7076, + "step": 8993, + "teacher_loss": 0.6722649335861206 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.4119094908237457, + "learning_rate": 2.9264302872442542e-05, + "loss": 0.2531, + "step": 8994, + "teacher_loss": 0.23541949689388275 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.37426674365997314, + "learning_rate": 2.9263600134633385e-05, + "loss": 0.2322, + "step": 8995, + "teacher_loss": 0.2164537012577057 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.2763417959213257, + "learning_rate": 2.9262897069802643e-05, + "loss": 0.2775, + "step": 8996, + "teacher_loss": 0.2775970697402954 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5463225245475769, + "learning_rate": 2.926219367796644e-05, + "loss": 0.2551, + "step": 8997, + "teacher_loss": 0.222771555185318 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.3921516537666321, + "learning_rate": 2.926148995914089e-05, + "loss": 0.27, + "step": 8998, + "teacher_loss": 0.2564300298690796 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.6816219091415405, + "learning_rate": 2.9260785913342134e-05, + "loss": 0.4254, + "step": 8999, + "teacher_loss": 0.3969782888889313 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.13172490894794464, + "learning_rate": 2.9260081540586316e-05, + "loss": 0.152, + "step": 9000, + "teacher_loss": 0.1542419195175171 + }, + { + "epoch": 1.63, + "eval_exact_match": 79.49858088930937, + "eval_f1": 86.98887453329, + "step": 9000 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.2775450646877289, + "learning_rate": 2.9259376840889577e-05, + "loss": 0.2705, + "step": 9001, + "teacher_loss": 0.26972103118896484 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5461021661758423, + "learning_rate": 2.9258671814268085e-05, + "loss": 0.2745, + "step": 9002, + "teacher_loss": 0.2442985326051712 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.6333494186401367, + "learning_rate": 2.9257966460737995e-05, + "loss": 0.343, + "step": 9003, + "teacher_loss": 0.31071579456329346 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.7762037515640259, + "learning_rate": 2.9257260780315485e-05, + "loss": 0.6891, + "step": 9004, + "teacher_loss": 0.6793792247772217 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.213931143283844, + "learning_rate": 2.9256554773016726e-05, + "loss": 0.2084, + "step": 9005, + "teacher_loss": 0.20781943202018738 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.45667171478271484, + "learning_rate": 2.9255848438857914e-05, + "loss": 0.2149, + "step": 9006, + "teacher_loss": 0.18809139728546143 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5289179682731628, + "learning_rate": 2.9255141777855234e-05, + "loss": 0.2499, + "step": 9007, + "teacher_loss": 0.21894359588623047 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.2061433345079422, + "learning_rate": 2.9254434790024894e-05, + "loss": 0.1528, + "step": 9008, + "teacher_loss": 0.14688506722450256 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.12677393853664398, + "learning_rate": 2.92537274753831e-05, + "loss": 0.2171, + "step": 9009, + "teacher_loss": 0.22715824842453003 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5902462005615234, + "learning_rate": 2.925301983394607e-05, + "loss": 0.2648, + "step": 9010, + "teacher_loss": 0.22865138947963715 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.30914685130119324, + "learning_rate": 2.9252311865730032e-05, + "loss": 0.2449, + "step": 9011, + "teacher_loss": 0.23771902918815613 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.23548491299152374, + "learning_rate": 2.9251603570751208e-05, + "loss": 0.3577, + "step": 9012, + "teacher_loss": 0.37123483419418335 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5109137892723083, + "learning_rate": 2.9250894949025843e-05, + "loss": 0.3313, + "step": 9013, + "teacher_loss": 0.31133121252059937 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.42207154631614685, + "learning_rate": 2.925018600057019e-05, + "loss": 0.2055, + "step": 9014, + "teacher_loss": 0.1813814640045166 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5506953001022339, + "learning_rate": 2.924947672540049e-05, + "loss": 0.2281, + "step": 9015, + "teacher_loss": 0.1923052966594696 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.4330574870109558, + "learning_rate": 2.9248767123533013e-05, + "loss": 0.2772, + "step": 9016, + "teacher_loss": 0.2598758041858673 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5268689393997192, + "learning_rate": 2.9248057194984024e-05, + "loss": 0.444, + "step": 9017, + "teacher_loss": 0.43482697010040283 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.6163475513458252, + "learning_rate": 2.9247346939769803e-05, + "loss": 0.3993, + "step": 9018, + "teacher_loss": 0.3751649260520935 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.6395128965377808, + "learning_rate": 2.9246636357906634e-05, + "loss": 0.2627, + "step": 9019, + "teacher_loss": 0.22086858749389648 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.13514599204063416, + "learning_rate": 2.9245925449410802e-05, + "loss": 0.2006, + "step": 9020, + "teacher_loss": 0.20785696804523468 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5912529230117798, + "learning_rate": 2.9245214214298618e-05, + "loss": 0.3054, + "step": 9021, + "teacher_loss": 0.2736865282058716 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.6193641424179077, + "learning_rate": 2.9244502652586376e-05, + "loss": 0.2885, + "step": 9022, + "teacher_loss": 0.25176793336868286 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.10610171407461166, + "learning_rate": 2.92437907642904e-05, + "loss": 0.2476, + "step": 9023, + "teacher_loss": 0.26327043771743774 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.4786786139011383, + "learning_rate": 2.9243078549427e-05, + "loss": 0.2771, + "step": 9024, + "teacher_loss": 0.2546723484992981 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.20772261917591095, + "learning_rate": 2.9242366008012515e-05, + "loss": 0.1914, + "step": 9025, + "teacher_loss": 0.1895974576473236 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.36129048466682434, + "learning_rate": 2.924165314006328e-05, + "loss": 0.313, + "step": 9026, + "teacher_loss": 0.30766934156417847 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.31653621792793274, + "learning_rate": 2.9240939945595635e-05, + "loss": 0.3129, + "step": 9027, + "teacher_loss": 0.31248581409454346 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.565127968788147, + "learning_rate": 2.9240226424625937e-05, + "loss": 0.4714, + "step": 9028, + "teacher_loss": 0.46099215745925903 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.332736074924469, + "learning_rate": 2.923951257717054e-05, + "loss": 0.2423, + "step": 9029, + "teacher_loss": 0.2322634607553482 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.33455342054367065, + "learning_rate": 2.9238798403245806e-05, + "loss": 0.2115, + "step": 9030, + "teacher_loss": 0.19782695174217224 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.364576131105423, + "learning_rate": 2.9238083902868123e-05, + "loss": 0.2352, + "step": 9031, + "teacher_loss": 0.22077983617782593 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.41065308451652527, + "learning_rate": 2.9237369076053863e-05, + "loss": 0.3668, + "step": 9032, + "teacher_loss": 0.3619014620780945 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.57323157787323, + "learning_rate": 2.9236653922819413e-05, + "loss": 0.3006, + "step": 9033, + "teacher_loss": 0.27034929394721985 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.31630322337150574, + "learning_rate": 2.9235938443181173e-05, + "loss": 0.2455, + "step": 9034, + "teacher_loss": 0.23765721917152405 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.46365129947662354, + "learning_rate": 2.9235222637155545e-05, + "loss": 0.264, + "step": 9035, + "teacher_loss": 0.24181796610355377 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.6756209135055542, + "learning_rate": 2.9234506504758947e-05, + "loss": 0.4013, + "step": 9036, + "teacher_loss": 0.37087541818618774 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.40011829137802124, + "learning_rate": 2.9233790046007788e-05, + "loss": 0.2508, + "step": 9037, + "teacher_loss": 0.23416680097579956 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.8036366105079651, + "learning_rate": 2.9233073260918497e-05, + "loss": 0.3005, + "step": 9038, + "teacher_loss": 0.24463282525539398 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.7782121300697327, + "learning_rate": 2.9232356149507506e-05, + "loss": 0.3367, + "step": 9039, + "teacher_loss": 0.28761789202690125 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.3888051509857178, + "learning_rate": 2.9231638711791266e-05, + "loss": 0.2375, + "step": 9040, + "teacher_loss": 0.22067710757255554 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.2764420807361603, + "learning_rate": 2.923092094778622e-05, + "loss": 0.2418, + "step": 9041, + "teacher_loss": 0.23798778653144836 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.295362651348114, + "learning_rate": 2.9230202857508816e-05, + "loss": 0.2099, + "step": 9042, + "teacher_loss": 0.20044925808906555 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5992039442062378, + "learning_rate": 2.922948444097553e-05, + "loss": 0.3471, + "step": 9043, + "teacher_loss": 0.3191283941268921 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.28975823521614075, + "learning_rate": 2.9228765698202826e-05, + "loss": 0.1868, + "step": 9044, + "teacher_loss": 0.1753292679786682 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.3033132553100586, + "learning_rate": 2.922804662920718e-05, + "loss": 0.2184, + "step": 9045, + "teacher_loss": 0.20897182822227478 + }, + { + "compression_loss": 0.0, + "epoch": 1.63, + "label_loss": 0.5455037355422974, + "learning_rate": 2.922732723400509e-05, + "loss": 0.247, + "step": 9046, + "teacher_loss": 0.21379461884498596 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.3931177854537964, + "learning_rate": 2.9226607512613042e-05, + "loss": 0.3164, + "step": 9047, + "teacher_loss": 0.30787965655326843 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.9656020998954773, + "learning_rate": 2.9225887465047534e-05, + "loss": 0.4078, + "step": 9048, + "teacher_loss": 0.34587740898132324 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.412752628326416, + "learning_rate": 2.9225167091325082e-05, + "loss": 0.3278, + "step": 9049, + "teacher_loss": 0.31834664940834045 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.37509581446647644, + "learning_rate": 2.9224446391462192e-05, + "loss": 0.3387, + "step": 9050, + "teacher_loss": 0.3346675634384155 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.3306233286857605, + "learning_rate": 2.92237253654754e-05, + "loss": 0.2769, + "step": 9051, + "teacher_loss": 0.2709203362464905 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.38647693395614624, + "learning_rate": 2.9223004013381225e-05, + "loss": 0.269, + "step": 9052, + "teacher_loss": 0.2559296786785126 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.7728319764137268, + "learning_rate": 2.9222282335196213e-05, + "loss": 0.3472, + "step": 9053, + "teacher_loss": 0.29990658164024353 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.3392825126647949, + "learning_rate": 2.9221560330936906e-05, + "loss": 0.1719, + "step": 9054, + "teacher_loss": 0.1533445566892624 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.36017727851867676, + "learning_rate": 2.922083800061986e-05, + "loss": 0.2298, + "step": 9055, + "teacher_loss": 0.21535304188728333 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2214267998933792, + "learning_rate": 2.9220115344261636e-05, + "loss": 0.1502, + "step": 9056, + "teacher_loss": 0.14225971698760986 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2785983979701996, + "learning_rate": 2.92193923618788e-05, + "loss": 0.2625, + "step": 9057, + "teacher_loss": 0.26066938042640686 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.4297206997871399, + "learning_rate": 2.921866905348793e-05, + "loss": 0.3632, + "step": 9058, + "teacher_loss": 0.35582199692726135 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.43686607480049133, + "learning_rate": 2.9217945419105607e-05, + "loss": 0.3753, + "step": 9059, + "teacher_loss": 0.3685005009174347 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 1.0970160961151123, + "learning_rate": 2.9217221458748422e-05, + "loss": 0.3261, + "step": 9060, + "teacher_loss": 0.2404937446117401 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.6919724941253662, + "learning_rate": 2.9216497172432976e-05, + "loss": 0.3807, + "step": 9061, + "teacher_loss": 0.34614098072052 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5340515971183777, + "learning_rate": 2.9215772560175877e-05, + "loss": 0.3082, + "step": 9062, + "teacher_loss": 0.2831352651119232 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 1.0165231227874756, + "learning_rate": 2.9215047621993728e-05, + "loss": 0.3067, + "step": 9063, + "teacher_loss": 0.22780652344226837 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2056713104248047, + "learning_rate": 2.921432235790316e-05, + "loss": 0.2499, + "step": 9064, + "teacher_loss": 0.25481289625167847 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.45500198006629944, + "learning_rate": 2.9213596767920795e-05, + "loss": 0.3414, + "step": 9065, + "teacher_loss": 0.3287578821182251 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.7360314130783081, + "learning_rate": 2.9212870852063273e-05, + "loss": 0.3475, + "step": 9066, + "teacher_loss": 0.30433177947998047 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.27344343066215515, + "learning_rate": 2.921214461034723e-05, + "loss": 0.3741, + "step": 9067, + "teacher_loss": 0.38526418805122375 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5669922828674316, + "learning_rate": 2.9211418042789325e-05, + "loss": 0.2746, + "step": 9068, + "teacher_loss": 0.2420617789030075 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.7302707433700562, + "learning_rate": 2.9210691149406214e-05, + "loss": 0.315, + "step": 9069, + "teacher_loss": 0.2688038945198059 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.9473024606704712, + "learning_rate": 2.920996393021456e-05, + "loss": 0.4028, + "step": 9070, + "teacher_loss": 0.3423303961753845 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2851428985595703, + "learning_rate": 2.9209236385231035e-05, + "loss": 0.2689, + "step": 9071, + "teacher_loss": 0.26709654927253723 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.4193482995033264, + "learning_rate": 2.920850851447232e-05, + "loss": 0.3216, + "step": 9072, + "teacher_loss": 0.3106866478919983 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2575805187225342, + "learning_rate": 2.9207780317955105e-05, + "loss": 0.3243, + "step": 9073, + "teacher_loss": 0.33174487948417664 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.8053159713745117, + "learning_rate": 2.9207051795696086e-05, + "loss": 0.5612, + "step": 9074, + "teacher_loss": 0.5340703725814819 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5010508298873901, + "learning_rate": 2.9206322947711963e-05, + "loss": 0.2438, + "step": 9075, + "teacher_loss": 0.21521982550621033 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.6863592267036438, + "learning_rate": 2.9205593774019447e-05, + "loss": 0.3695, + "step": 9076, + "teacher_loss": 0.33430618047714233 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2966417372226715, + "learning_rate": 2.920486427463526e-05, + "loss": 0.2333, + "step": 9077, + "teacher_loss": 0.22628529369831085 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2085174024105072, + "learning_rate": 2.920413444957612e-05, + "loss": 0.2124, + "step": 9078, + "teacher_loss": 0.21279451251029968 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.7356836795806885, + "learning_rate": 2.9203404298858767e-05, + "loss": 0.2772, + "step": 9079, + "teacher_loss": 0.2262895703315735 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.18428349494934082, + "learning_rate": 2.9202673822499932e-05, + "loss": 0.2197, + "step": 9080, + "teacher_loss": 0.22368015348911285 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.44111815094947815, + "learning_rate": 2.920194302051637e-05, + "loss": 0.508, + "step": 9081, + "teacher_loss": 0.5154268741607666 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.18327198922634125, + "learning_rate": 2.9201211892924834e-05, + "loss": 0.2523, + "step": 9082, + "teacher_loss": 0.2599583566188812 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.1522093415260315, + "learning_rate": 2.9200480439742092e-05, + "loss": 0.1954, + "step": 9083, + "teacher_loss": 0.20016592741012573 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.6587107181549072, + "learning_rate": 2.91997486609849e-05, + "loss": 0.4255, + "step": 9084, + "teacher_loss": 0.3995497226715088 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.8274158239364624, + "learning_rate": 2.919901655667005e-05, + "loss": 0.3695, + "step": 9085, + "teacher_loss": 0.3186189532279968 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5901550650596619, + "learning_rate": 2.9198284126814318e-05, + "loss": 0.274, + "step": 9086, + "teacher_loss": 0.23882898688316345 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.3918238878250122, + "learning_rate": 2.9197551371434504e-05, + "loss": 0.3561, + "step": 9087, + "teacher_loss": 0.35213425755500793 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.17997834086418152, + "learning_rate": 2.9196818290547402e-05, + "loss": 0.2195, + "step": 9088, + "teacher_loss": 0.22384393215179443 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2517014443874359, + "learning_rate": 2.919608488416982e-05, + "loss": 0.2997, + "step": 9089, + "teacher_loss": 0.3050664961338043 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.14812707901000977, + "learning_rate": 2.919535115231857e-05, + "loss": 0.1585, + "step": 9090, + "teacher_loss": 0.15967410802841187 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.4033532738685608, + "learning_rate": 2.9194617095010483e-05, + "loss": 0.2675, + "step": 9091, + "teacher_loss": 0.25237852334976196 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.495887815952301, + "learning_rate": 2.9193882712262385e-05, + "loss": 0.2796, + "step": 9092, + "teacher_loss": 0.2555280923843384 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.557904839515686, + "learning_rate": 2.9193148004091106e-05, + "loss": 0.2991, + "step": 9093, + "teacher_loss": 0.2703282833099365 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5590754151344299, + "learning_rate": 2.9192412970513503e-05, + "loss": 0.2339, + "step": 9094, + "teacher_loss": 0.19781169295310974 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5037227272987366, + "learning_rate": 2.9191677611546418e-05, + "loss": 0.3516, + "step": 9095, + "teacher_loss": 0.3346906304359436 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.6285238265991211, + "learning_rate": 2.9190941927206714e-05, + "loss": 0.2826, + "step": 9096, + "teacher_loss": 0.24419990181922913 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.2855442464351654, + "learning_rate": 2.919020591751126e-05, + "loss": 0.1945, + "step": 9097, + "teacher_loss": 0.1844368427991867 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.5357562303543091, + "learning_rate": 2.9189469582476925e-05, + "loss": 0.3168, + "step": 9098, + "teacher_loss": 0.2925136387348175 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.4319174885749817, + "learning_rate": 2.9188732922120597e-05, + "loss": 0.2324, + "step": 9099, + "teacher_loss": 0.2102731615304947 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 1.1863384246826172, + "learning_rate": 2.9187995936459164e-05, + "loss": 0.4169, + "step": 9100, + "teacher_loss": 0.33135542273521423 + }, + { + "compression_loss": 0.0, + "epoch": 1.64, + "label_loss": 0.4632750451564789, + "learning_rate": 2.9187258625509518e-05, + "loss": 0.3712, + "step": 9101, + "teacher_loss": 0.36099544167518616 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.38158440589904785, + "learning_rate": 2.9186520989288574e-05, + "loss": 0.1905, + "step": 9102, + "teacher_loss": 0.1692637801170349 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.29963284730911255, + "learning_rate": 2.9185783027813233e-05, + "loss": 0.2409, + "step": 9103, + "teacher_loss": 0.23442545533180237 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.44087713956832886, + "learning_rate": 2.918504474110042e-05, + "loss": 0.3083, + "step": 9104, + "teacher_loss": 0.2935802936553955 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5815075635910034, + "learning_rate": 2.918430612916706e-05, + "loss": 0.2272, + "step": 9105, + "teacher_loss": 0.18782316148281097 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.3396179676055908, + "learning_rate": 2.9183567192030087e-05, + "loss": 0.2595, + "step": 9106, + "teacher_loss": 0.25058799982070923 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.2538607120513916, + "learning_rate": 2.918282792970644e-05, + "loss": 0.1486, + "step": 9107, + "teacher_loss": 0.1368706226348877 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6355059742927551, + "learning_rate": 2.9182088342213074e-05, + "loss": 0.3823, + "step": 9108, + "teacher_loss": 0.35412734746932983 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6335360407829285, + "learning_rate": 2.9181348429566944e-05, + "loss": 0.2409, + "step": 9109, + "teacher_loss": 0.19730710983276367 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.14662745594978333, + "learning_rate": 2.9180608191785005e-05, + "loss": 0.2052, + "step": 9110, + "teacher_loss": 0.21166807413101196 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.293609082698822, + "learning_rate": 2.917986762888424e-05, + "loss": 0.2686, + "step": 9111, + "teacher_loss": 0.2658686935901642 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.8239504098892212, + "learning_rate": 2.9179126740881627e-05, + "loss": 0.3773, + "step": 9112, + "teacher_loss": 0.3276805281639099 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.2325119972229004, + "learning_rate": 2.9178385527794148e-05, + "loss": 0.2388, + "step": 9113, + "teacher_loss": 0.23952540755271912 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.7991150617599487, + "learning_rate": 2.9177643989638795e-05, + "loss": 0.2831, + "step": 9114, + "teacher_loss": 0.22571980953216553 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.40664488077163696, + "learning_rate": 2.9176902126432573e-05, + "loss": 0.2077, + "step": 9115, + "teacher_loss": 0.185613214969635 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.20584745705127716, + "learning_rate": 2.9176159938192488e-05, + "loss": 0.2834, + "step": 9116, + "teacher_loss": 0.29207220673561096 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.41206902265548706, + "learning_rate": 2.917541742493556e-05, + "loss": 0.2526, + "step": 9117, + "teacher_loss": 0.23488499224185944 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.550771176815033, + "learning_rate": 2.9174674586678817e-05, + "loss": 0.2626, + "step": 9118, + "teacher_loss": 0.23062163591384888 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.2395303100347519, + "learning_rate": 2.9173931423439275e-05, + "loss": 0.2238, + "step": 9119, + "teacher_loss": 0.2221044898033142 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.48667821288108826, + "learning_rate": 2.917318793523398e-05, + "loss": 0.2558, + "step": 9120, + "teacher_loss": 0.23009879887104034 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.49438217282295227, + "learning_rate": 2.9172444122079985e-05, + "loss": 0.3426, + "step": 9121, + "teacher_loss": 0.32576125860214233 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.33360904455184937, + "learning_rate": 2.9171699983994334e-05, + "loss": 0.238, + "step": 9122, + "teacher_loss": 0.22737759351730347 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.4617688059806824, + "learning_rate": 2.9170955520994095e-05, + "loss": 0.3006, + "step": 9123, + "teacher_loss": 0.28264304995536804 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.9464321136474609, + "learning_rate": 2.9170210733096328e-05, + "loss": 0.3181, + "step": 9124, + "teacher_loss": 0.24831557273864746 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6949301958084106, + "learning_rate": 2.9169465620318114e-05, + "loss": 0.5848, + "step": 9125, + "teacher_loss": 0.5725321769714355 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.22288376092910767, + "learning_rate": 2.9168720182676542e-05, + "loss": 0.2984, + "step": 9126, + "teacher_loss": 0.30681461095809937 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 1.2912129163742065, + "learning_rate": 2.916797442018869e-05, + "loss": 0.3276, + "step": 9127, + "teacher_loss": 0.2204945832490921 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.4136827886104584, + "learning_rate": 2.9167228332871664e-05, + "loss": 0.2902, + "step": 9128, + "teacher_loss": 0.276501327753067 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5210403800010681, + "learning_rate": 2.9166481920742565e-05, + "loss": 0.33, + "step": 9129, + "teacher_loss": 0.3087441921234131 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.9094825983047485, + "learning_rate": 2.916573518381851e-05, + "loss": 0.6093, + "step": 9130, + "teacher_loss": 0.5759446620941162 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6826096773147583, + "learning_rate": 2.916498812211662e-05, + "loss": 0.3886, + "step": 9131, + "teacher_loss": 0.35588371753692627 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.820324182510376, + "learning_rate": 2.9164240735654024e-05, + "loss": 0.3699, + "step": 9132, + "teacher_loss": 0.3198615610599518 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.38956284523010254, + "learning_rate": 2.9163493024447848e-05, + "loss": 0.2129, + "step": 9133, + "teacher_loss": 0.19325563311576843 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6421723961830139, + "learning_rate": 2.9162744988515244e-05, + "loss": 0.2735, + "step": 9134, + "teacher_loss": 0.23250806331634521 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.40422120690345764, + "learning_rate": 2.9161996627873362e-05, + "loss": 0.227, + "step": 9135, + "teacher_loss": 0.20733872056007385 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.47665566205978394, + "learning_rate": 2.9161247942539354e-05, + "loss": 0.464, + "step": 9136, + "teacher_loss": 0.46256759762763977 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.2436458170413971, + "learning_rate": 2.916049893253039e-05, + "loss": 0.2476, + "step": 9137, + "teacher_loss": 0.2479856312274933 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5855885148048401, + "learning_rate": 2.9159749597863642e-05, + "loss": 0.2543, + "step": 9138, + "teacher_loss": 0.217438742518425 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6897940635681152, + "learning_rate": 2.9158999938556287e-05, + "loss": 0.2787, + "step": 9139, + "teacher_loss": 0.23306520283222198 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.3864523768424988, + "learning_rate": 2.9158249954625514e-05, + "loss": 0.3303, + "step": 9140, + "teacher_loss": 0.3241148889064789 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.8880658745765686, + "learning_rate": 2.9157499646088516e-05, + "loss": 0.3219, + "step": 9141, + "teacher_loss": 0.2589852213859558 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5536705255508423, + "learning_rate": 2.9156749012962503e-05, + "loss": 0.2034, + "step": 9142, + "teacher_loss": 0.16445383429527283 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.6476191282272339, + "learning_rate": 2.9155998055264676e-05, + "loss": 0.3762, + "step": 9143, + "teacher_loss": 0.34609195590019226 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.2706192135810852, + "learning_rate": 2.915524677301226e-05, + "loss": 0.1901, + "step": 9144, + "teacher_loss": 0.18110932409763336 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.4505302906036377, + "learning_rate": 2.9154495166222467e-05, + "loss": 0.2674, + "step": 9145, + "teacher_loss": 0.2470816969871521 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.4657009541988373, + "learning_rate": 2.9153743234912542e-05, + "loss": 0.3199, + "step": 9146, + "teacher_loss": 0.30373120307922363 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5785669088363647, + "learning_rate": 2.9152990979099722e-05, + "loss": 0.3287, + "step": 9147, + "teacher_loss": 0.3009897768497467 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5754903554916382, + "learning_rate": 2.9152238398801247e-05, + "loss": 0.2928, + "step": 9148, + "teacher_loss": 0.2613980174064636 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.41845351457595825, + "learning_rate": 2.9151485494034375e-05, + "loss": 0.2179, + "step": 9149, + "teacher_loss": 0.19566407799720764 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.9027433395385742, + "learning_rate": 2.9150732264816373e-05, + "loss": 0.2776, + "step": 9150, + "teacher_loss": 0.2081412672996521 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.29552245140075684, + "learning_rate": 2.9149978711164506e-05, + "loss": 0.2592, + "step": 9151, + "teacher_loss": 0.25517046451568604 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.3398802578449249, + "learning_rate": 2.9149224833096048e-05, + "loss": 0.2685, + "step": 9152, + "teacher_loss": 0.260617196559906 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.08707955479621887, + "learning_rate": 2.914847063062829e-05, + "loss": 0.1478, + "step": 9153, + "teacher_loss": 0.1544952392578125 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.29313355684280396, + "learning_rate": 2.914771610377852e-05, + "loss": 0.2187, + "step": 9154, + "teacher_loss": 0.210408017039299 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5280560255050659, + "learning_rate": 2.9146961252564036e-05, + "loss": 0.3287, + "step": 9155, + "teacher_loss": 0.30655890703201294 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.5884580612182617, + "learning_rate": 2.9146206077002144e-05, + "loss": 0.3101, + "step": 9156, + "teacher_loss": 0.2792201638221741 + }, + { + "compression_loss": 0.0, + "epoch": 1.65, + "label_loss": 0.45135048031806946, + "learning_rate": 2.9145450577110158e-05, + "loss": 0.3613, + "step": 9157, + "teacher_loss": 0.3513393700122833 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.34703540802001953, + "learning_rate": 2.91446947529054e-05, + "loss": 0.1799, + "step": 9158, + "teacher_loss": 0.16128291189670563 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.28634852170944214, + "learning_rate": 2.9143938604405202e-05, + "loss": 0.2983, + "step": 9159, + "teacher_loss": 0.29964128136634827 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.1861371397972107, + "learning_rate": 2.9143182131626894e-05, + "loss": 0.2139, + "step": 9160, + "teacher_loss": 0.21699032187461853 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.43974313139915466, + "learning_rate": 2.9142425334587832e-05, + "loss": 0.2957, + "step": 9161, + "teacher_loss": 0.27972549200057983 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.37349262833595276, + "learning_rate": 2.914166821330535e-05, + "loss": 0.2139, + "step": 9162, + "teacher_loss": 0.19613327085971832 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.323818564414978, + "learning_rate": 2.9140910767796815e-05, + "loss": 0.2629, + "step": 9163, + "teacher_loss": 0.2561449408531189 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.32089412212371826, + "learning_rate": 2.9140152998079594e-05, + "loss": 0.407, + "step": 9164, + "teacher_loss": 0.41659343242645264 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.12852616608142853, + "learning_rate": 2.9139394904171064e-05, + "loss": 0.168, + "step": 9165, + "teacher_loss": 0.1724090278148651 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.2106059491634369, + "learning_rate": 2.9138636486088598e-05, + "loss": 0.1802, + "step": 9166, + "teacher_loss": 0.1767694503068924 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.9225035309791565, + "learning_rate": 2.9137877743849584e-05, + "loss": 0.3036, + "step": 9167, + "teacher_loss": 0.23487567901611328 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3759137690067291, + "learning_rate": 2.9137118677471424e-05, + "loss": 0.2648, + "step": 9168, + "teacher_loss": 0.2524906396865845 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.6851135492324829, + "learning_rate": 2.9136359286971516e-05, + "loss": 0.2516, + "step": 9169, + "teacher_loss": 0.20341722667217255 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.35278797149658203, + "learning_rate": 2.913559957236727e-05, + "loss": 0.2856, + "step": 9170, + "teacher_loss": 0.2780858278274536 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.41789448261260986, + "learning_rate": 2.9134839533676115e-05, + "loss": 0.2301, + "step": 9171, + "teacher_loss": 0.20921780169010162 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3479253351688385, + "learning_rate": 2.913407917091546e-05, + "loss": 0.3206, + "step": 9172, + "teacher_loss": 0.3175867199897766 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.5016261339187622, + "learning_rate": 2.913331848410275e-05, + "loss": 0.2401, + "step": 9173, + "teacher_loss": 0.2110644280910492 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.1500146985054016, + "learning_rate": 2.9132557473255423e-05, + "loss": 0.1356, + "step": 9174, + "teacher_loss": 0.13395142555236816 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.4810018539428711, + "learning_rate": 2.9131796138390916e-05, + "loss": 0.2436, + "step": 9175, + "teacher_loss": 0.21721789240837097 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.6518871188163757, + "learning_rate": 2.91310344795267e-05, + "loss": 0.583, + "step": 9176, + "teacher_loss": 0.5752996206283569 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.8711276054382324, + "learning_rate": 2.913027249668023e-05, + "loss": 0.3921, + "step": 9177, + "teacher_loss": 0.3388691544532776 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3561475872993469, + "learning_rate": 2.9129510189868974e-05, + "loss": 0.5587, + "step": 9178, + "teacher_loss": 0.581181526184082 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3938180208206177, + "learning_rate": 2.9128747559110412e-05, + "loss": 0.385, + "step": 9179, + "teacher_loss": 0.38401904702186584 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3391727805137634, + "learning_rate": 2.9127984604422033e-05, + "loss": 0.2212, + "step": 9180, + "teacher_loss": 0.20806317031383514 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.46113285422325134, + "learning_rate": 2.9127221325821322e-05, + "loss": 0.3613, + "step": 9181, + "teacher_loss": 0.35018473863601685 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.519561231136322, + "learning_rate": 2.912645772332578e-05, + "loss": 0.3047, + "step": 9182, + "teacher_loss": 0.2808586359024048 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.7171036005020142, + "learning_rate": 2.9125693796952916e-05, + "loss": 0.4206, + "step": 9183, + "teacher_loss": 0.38763055205345154 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.17597715556621552, + "learning_rate": 2.9124929546720244e-05, + "loss": 0.2244, + "step": 9184, + "teacher_loss": 0.22979843616485596 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3520622253417969, + "learning_rate": 2.912416497264529e-05, + "loss": 0.2158, + "step": 9185, + "teacher_loss": 0.20062606036663055 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.7940888404846191, + "learning_rate": 2.9123400074745575e-05, + "loss": 0.3042, + "step": 9186, + "teacher_loss": 0.24978914856910706 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.4371662139892578, + "learning_rate": 2.9122634853038642e-05, + "loss": 0.263, + "step": 9187, + "teacher_loss": 0.24369487166404724 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.4842766225337982, + "learning_rate": 2.9121869307542035e-05, + "loss": 0.1765, + "step": 9188, + "teacher_loss": 0.14227421581745148 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.6293430328369141, + "learning_rate": 2.9121103438273305e-05, + "loss": 0.3706, + "step": 9189, + "teacher_loss": 0.34190118312835693 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.8457003831863403, + "learning_rate": 2.912033724525001e-05, + "loss": 0.256, + "step": 9190, + "teacher_loss": 0.19047951698303223 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.5857725143432617, + "learning_rate": 2.9119570728489713e-05, + "loss": 0.4017, + "step": 9191, + "teacher_loss": 0.38119202852249146 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.617472767829895, + "learning_rate": 2.9118803888009994e-05, + "loss": 0.2826, + "step": 9192, + "teacher_loss": 0.24534976482391357 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.540016770362854, + "learning_rate": 2.9118036723828434e-05, + "loss": 0.4679, + "step": 9193, + "teacher_loss": 0.45985740423202515 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.6320154070854187, + "learning_rate": 2.9117269235962615e-05, + "loss": 0.3233, + "step": 9194, + "teacher_loss": 0.28901833295822144 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.7137870788574219, + "learning_rate": 2.9116501424430145e-05, + "loss": 0.3359, + "step": 9195, + "teacher_loss": 0.2938670814037323 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.5262972712516785, + "learning_rate": 2.9115733289248616e-05, + "loss": 0.2781, + "step": 9196, + "teacher_loss": 0.2505384385585785 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.24922776222229004, + "learning_rate": 2.9114964830435648e-05, + "loss": 0.1998, + "step": 9197, + "teacher_loss": 0.19428768754005432 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3508811593055725, + "learning_rate": 2.911419604800885e-05, + "loss": 0.2061, + "step": 9198, + "teacher_loss": 0.18997475504875183 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.4926711916923523, + "learning_rate": 2.9113426941985858e-05, + "loss": 0.2751, + "step": 9199, + "teacher_loss": 0.250963032245636 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.2966512441635132, + "learning_rate": 2.9112657512384296e-05, + "loss": 0.1693, + "step": 9200, + "teacher_loss": 0.15516717731952667 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.14558416604995728, + "learning_rate": 2.9111887759221813e-05, + "loss": 0.157, + "step": 9201, + "teacher_loss": 0.15823706984519958 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.3428613543510437, + "learning_rate": 2.9111117682516055e-05, + "loss": 0.252, + "step": 9202, + "teacher_loss": 0.24191418290138245 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.42056864500045776, + "learning_rate": 2.9110347282284673e-05, + "loss": 0.2424, + "step": 9203, + "teacher_loss": 0.22260910272598267 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.11226047575473785, + "learning_rate": 2.910957655854533e-05, + "loss": 0.1434, + "step": 9204, + "teacher_loss": 0.14691483974456787 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.5073035359382629, + "learning_rate": 2.9108805511315706e-05, + "loss": 0.2177, + "step": 9205, + "teacher_loss": 0.18552683293819427 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.2442966103553772, + "learning_rate": 2.910803414061347e-05, + "loss": 0.1605, + "step": 9206, + "teacher_loss": 0.15113897621631622 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.1659238487482071, + "learning_rate": 2.9107262446456302e-05, + "loss": 0.2879, + "step": 9207, + "teacher_loss": 0.3014804720878601 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.6373984813690186, + "learning_rate": 2.9106490428861908e-05, + "loss": 0.2535, + "step": 9208, + "teacher_loss": 0.2108057737350464 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.26981639862060547, + "learning_rate": 2.9105718087847983e-05, + "loss": 0.2083, + "step": 9209, + "teacher_loss": 0.20146358013153076 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.7666878700256348, + "learning_rate": 2.910494542343223e-05, + "loss": 0.2924, + "step": 9210, + "teacher_loss": 0.23973098397254944 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.6488733291625977, + "learning_rate": 2.910417243563237e-05, + "loss": 0.3363, + "step": 9211, + "teacher_loss": 0.3016122877597809 + }, + { + "compression_loss": 0.0, + "epoch": 1.66, + "label_loss": 0.4720398783683777, + "learning_rate": 2.910339912446612e-05, + "loss": 0.2593, + "step": 9212, + "teacher_loss": 0.23561860620975494 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.32900285720825195, + "learning_rate": 2.9102625489951212e-05, + "loss": 0.2193, + "step": 9213, + "teacher_loss": 0.20714855194091797 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.15560680627822876, + "learning_rate": 2.910185153210539e-05, + "loss": 0.205, + "step": 9214, + "teacher_loss": 0.21053211390972137 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.4644492268562317, + "learning_rate": 2.9101077250946388e-05, + "loss": 0.3256, + "step": 9215, + "teacher_loss": 0.31014496088027954 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.38424551486968994, + "learning_rate": 2.9100302646491953e-05, + "loss": 0.2388, + "step": 9216, + "teacher_loss": 0.2226463258266449 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.23442524671554565, + "learning_rate": 2.9099527718759865e-05, + "loss": 0.1849, + "step": 9217, + "teacher_loss": 0.17943312227725983 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5474934577941895, + "learning_rate": 2.9098752467767876e-05, + "loss": 0.2807, + "step": 9218, + "teacher_loss": 0.2510283291339874 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.43334537744522095, + "learning_rate": 2.909797689353376e-05, + "loss": 0.2001, + "step": 9219, + "teacher_loss": 0.17415004968643188 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.38292402029037476, + "learning_rate": 2.9097200996075303e-05, + "loss": 0.2521, + "step": 9220, + "teacher_loss": 0.2376130223274231 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.3658839166164398, + "learning_rate": 2.9096424775410295e-05, + "loss": 0.2305, + "step": 9221, + "teacher_loss": 0.21548418700695038 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.28370532393455505, + "learning_rate": 2.9095648231556525e-05, + "loss": 0.1966, + "step": 9222, + "teacher_loss": 0.18696634471416473 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5835572481155396, + "learning_rate": 2.909487136453181e-05, + "loss": 0.4144, + "step": 9223, + "teacher_loss": 0.3955951929092407 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5398017764091492, + "learning_rate": 2.909409417435395e-05, + "loss": 0.2965, + "step": 9224, + "teacher_loss": 0.2695165276527405 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.6575205326080322, + "learning_rate": 2.9093316661040763e-05, + "loss": 0.3395, + "step": 9225, + "teacher_loss": 0.3041561245918274 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5120516419410706, + "learning_rate": 2.9092538824610085e-05, + "loss": 0.2579, + "step": 9226, + "teacher_loss": 0.22961165010929108 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.4826245605945587, + "learning_rate": 2.909176066507974e-05, + "loss": 0.2436, + "step": 9227, + "teacher_loss": 0.21709519624710083 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.48871737718582153, + "learning_rate": 2.909098218246757e-05, + "loss": 0.3039, + "step": 9228, + "teacher_loss": 0.283365398645401 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.9395196437835693, + "learning_rate": 2.909020337679143e-05, + "loss": 0.3322, + "step": 9229, + "teacher_loss": 0.26474103331565857 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.4654906690120697, + "learning_rate": 2.908942424806917e-05, + "loss": 0.2329, + "step": 9230, + "teacher_loss": 0.20709139108657837 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.3360847234725952, + "learning_rate": 2.9088644796318654e-05, + "loss": 0.5773, + "step": 9231, + "teacher_loss": 0.6040756106376648 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.3834913671016693, + "learning_rate": 2.908786502155775e-05, + "loss": 0.2069, + "step": 9232, + "teacher_loss": 0.18724675476551056 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5472903251647949, + "learning_rate": 2.9087084923804342e-05, + "loss": 0.2869, + "step": 9233, + "teacher_loss": 0.2579546272754669 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5131911039352417, + "learning_rate": 2.9086304503076307e-05, + "loss": 0.6983, + "step": 9234, + "teacher_loss": 0.7188769578933716 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 1.0257718563079834, + "learning_rate": 2.9085523759391547e-05, + "loss": 0.3589, + "step": 9235, + "teacher_loss": 0.2847823202610016 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.27047398686408997, + "learning_rate": 2.908474269276796e-05, + "loss": 0.1863, + "step": 9236, + "teacher_loss": 0.17691370844841003 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.6767044067382812, + "learning_rate": 2.9083961303223444e-05, + "loss": 0.3408, + "step": 9237, + "teacher_loss": 0.30342957377433777 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.22198769450187683, + "learning_rate": 2.9083179590775928e-05, + "loss": 0.2749, + "step": 9238, + "teacher_loss": 0.2807576060295105 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.2379516363143921, + "learning_rate": 2.908239755544333e-05, + "loss": 0.2751, + "step": 9239, + "teacher_loss": 0.2791757583618164 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5076822638511658, + "learning_rate": 2.908161519724357e-05, + "loss": 0.3569, + "step": 9240, + "teacher_loss": 0.34017837047576904 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5089656114578247, + "learning_rate": 2.9080832516194596e-05, + "loss": 0.3203, + "step": 9241, + "teacher_loss": 0.2993620038032532 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.36689141392707825, + "learning_rate": 2.9080049512314343e-05, + "loss": 0.2392, + "step": 9242, + "teacher_loss": 0.22506119310855865 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.7230166792869568, + "learning_rate": 2.9079266185620774e-05, + "loss": 0.2927, + "step": 9243, + "teacher_loss": 0.24490296840667725 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.2775919735431671, + "learning_rate": 2.907848253613185e-05, + "loss": 0.2635, + "step": 9244, + "teacher_loss": 0.26190197467803955 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5631516575813293, + "learning_rate": 2.907769856386552e-05, + "loss": 0.3146, + "step": 9245, + "teacher_loss": 0.2869395911693573 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.36183035373687744, + "learning_rate": 2.907691426883977e-05, + "loss": 0.251, + "step": 9246, + "teacher_loss": 0.2386607825756073 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.3816518187522888, + "learning_rate": 2.9076129651072587e-05, + "loss": 0.207, + "step": 9247, + "teacher_loss": 0.18758907914161682 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.6186832189559937, + "learning_rate": 2.907534471058195e-05, + "loss": 0.2848, + "step": 9248, + "teacher_loss": 0.247691810131073 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.7001530528068542, + "learning_rate": 2.907455944738586e-05, + "loss": 0.4486, + "step": 9249, + "teacher_loss": 0.4206329882144928 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.36669573187828064, + "learning_rate": 2.9073773861502315e-05, + "loss": 0.3121, + "step": 9250, + "teacher_loss": 0.3059922456741333 + }, + { + "epoch": 1.67, + "eval_exact_match": 78.98770104068117, + "eval_f1": 86.61258879189994, + "step": 9250 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.9323701858520508, + "learning_rate": 2.9072987952949337e-05, + "loss": 0.5258, + "step": 9251, + "teacher_loss": 0.4806171953678131 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.1984187662601471, + "learning_rate": 2.9072201721744935e-05, + "loss": 0.3295, + "step": 9252, + "teacher_loss": 0.34404256939888 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.38764792680740356, + "learning_rate": 2.9071415167907136e-05, + "loss": 0.1964, + "step": 9253, + "teacher_loss": 0.17520058155059814 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.4380306303501129, + "learning_rate": 2.907062829145398e-05, + "loss": 0.3262, + "step": 9254, + "teacher_loss": 0.3137598931789398 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.8820582628250122, + "learning_rate": 2.90698410924035e-05, + "loss": 0.2866, + "step": 9255, + "teacher_loss": 0.22039452195167542 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.7891812324523926, + "learning_rate": 2.9069053570773752e-05, + "loss": 0.3336, + "step": 9256, + "teacher_loss": 0.2829534113407135 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.4762847423553467, + "learning_rate": 2.906826572658278e-05, + "loss": 0.1818, + "step": 9257, + "teacher_loss": 0.14908862113952637 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5055019855499268, + "learning_rate": 2.9067477559848655e-05, + "loss": 0.2694, + "step": 9258, + "teacher_loss": 0.24312575161457062 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.6891862750053406, + "learning_rate": 2.9066689070589453e-05, + "loss": 0.4136, + "step": 9259, + "teacher_loss": 0.3829249143600464 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.4040907025337219, + "learning_rate": 2.906590025882324e-05, + "loss": 0.2256, + "step": 9260, + "teacher_loss": 0.2057361751794815 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.7869569659233093, + "learning_rate": 2.9065111124568105e-05, + "loss": 0.5716, + "step": 9261, + "teacher_loss": 0.5477244853973389 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.9112817049026489, + "learning_rate": 2.906432166784214e-05, + "loss": 0.4773, + "step": 9262, + "teacher_loss": 0.429031103849411 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5311615467071533, + "learning_rate": 2.906353188866345e-05, + "loss": 0.2391, + "step": 9263, + "teacher_loss": 0.20668652653694153 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.5544610023498535, + "learning_rate": 2.9062741787050134e-05, + "loss": 0.2283, + "step": 9264, + "teacher_loss": 0.19200721383094788 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.35473915934562683, + "learning_rate": 2.9061951363020317e-05, + "loss": 0.2168, + "step": 9265, + "teacher_loss": 0.2015204131603241 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.37907469272613525, + "learning_rate": 2.906116061659211e-05, + "loss": 0.2686, + "step": 9266, + "teacher_loss": 0.2563498318195343 + }, + { + "compression_loss": 0.0, + "epoch": 1.67, + "label_loss": 0.7442792057991028, + "learning_rate": 2.9060369547783653e-05, + "loss": 0.3597, + "step": 9267, + "teacher_loss": 0.3169637620449066 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.4627285301685333, + "learning_rate": 2.9059578156613074e-05, + "loss": 0.2441, + "step": 9268, + "teacher_loss": 0.21980109810829163 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.09949947148561478, + "learning_rate": 2.905878644309852e-05, + "loss": 0.1786, + "step": 9269, + "teacher_loss": 0.1874256730079651 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.4641808867454529, + "learning_rate": 2.9057994407258143e-05, + "loss": 0.2933, + "step": 9270, + "teacher_loss": 0.2743515968322754 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5340152382850647, + "learning_rate": 2.9057202049110105e-05, + "loss": 0.2742, + "step": 9271, + "teacher_loss": 0.2453407645225525 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.6270548105239868, + "learning_rate": 2.9056409368672564e-05, + "loss": 0.3086, + "step": 9272, + "teacher_loss": 0.2731887102127075 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.20683591067790985, + "learning_rate": 2.9055616365963706e-05, + "loss": 0.1758, + "step": 9273, + "teacher_loss": 0.17235158383846283 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5508305430412292, + "learning_rate": 2.9054823041001705e-05, + "loss": 0.2708, + "step": 9274, + "teacher_loss": 0.2396995574235916 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.44754481315612793, + "learning_rate": 2.905402939380475e-05, + "loss": 0.4151, + "step": 9275, + "teacher_loss": 0.41147178411483765 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.43648675084114075, + "learning_rate": 2.9053235424391032e-05, + "loss": 0.324, + "step": 9276, + "teacher_loss": 0.31154996156692505 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.3455158770084381, + "learning_rate": 2.9052441132778766e-05, + "loss": 0.259, + "step": 9277, + "teacher_loss": 0.24937212467193604 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.3073540925979614, + "learning_rate": 2.9051646518986154e-05, + "loss": 0.3141, + "step": 9278, + "teacher_loss": 0.314807653427124 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.48548024892807007, + "learning_rate": 2.905085158303141e-05, + "loss": 0.257, + "step": 9279, + "teacher_loss": 0.2316540777683258 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.320823073387146, + "learning_rate": 2.9050056324932773e-05, + "loss": 0.2213, + "step": 9280, + "teacher_loss": 0.2102929800748825 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5867105722427368, + "learning_rate": 2.904926074470847e-05, + "loss": 0.2623, + "step": 9281, + "teacher_loss": 0.22629155218601227 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5472455620765686, + "learning_rate": 2.9048464842376736e-05, + "loss": 0.3346, + "step": 9282, + "teacher_loss": 0.3110141158103943 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.3994990587234497, + "learning_rate": 2.904766861795582e-05, + "loss": 0.3554, + "step": 9283, + "teacher_loss": 0.3504706621170044 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.6690493226051331, + "learning_rate": 2.9046872071463988e-05, + "loss": 0.3163, + "step": 9284, + "teacher_loss": 0.27708274126052856 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5111331343650818, + "learning_rate": 2.9046075202919487e-05, + "loss": 0.2483, + "step": 9285, + "teacher_loss": 0.219122514128685 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5614480376243591, + "learning_rate": 2.9045278012340593e-05, + "loss": 0.2893, + "step": 9286, + "teacher_loss": 0.25906965136528015 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.19893768429756165, + "learning_rate": 2.904448049974559e-05, + "loss": 0.1982, + "step": 9287, + "teacher_loss": 0.19816677272319794 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5493618249893188, + "learning_rate": 2.9043682665152756e-05, + "loss": 0.2332, + "step": 9288, + "teacher_loss": 0.19810637831687927 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.3882234990596771, + "learning_rate": 2.9042884508580383e-05, + "loss": 0.2459, + "step": 9289, + "teacher_loss": 0.2301032692193985 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.3313745856285095, + "learning_rate": 2.904208603004677e-05, + "loss": 0.2239, + "step": 9290, + "teacher_loss": 0.2119961827993393 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.33346739411354065, + "learning_rate": 2.9041287229570222e-05, + "loss": 0.352, + "step": 9291, + "teacher_loss": 0.3541010618209839 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5447283387184143, + "learning_rate": 2.904048810716906e-05, + "loss": 0.2614, + "step": 9292, + "teacher_loss": 0.22986991703510284 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5627639293670654, + "learning_rate": 2.90396886628616e-05, + "loss": 0.3035, + "step": 9293, + "teacher_loss": 0.27467185258865356 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.14594316482543945, + "learning_rate": 2.9038888896666172e-05, + "loss": 0.1764, + "step": 9294, + "teacher_loss": 0.17978979647159576 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.6003696918487549, + "learning_rate": 2.9038088808601114e-05, + "loss": 0.2908, + "step": 9295, + "teacher_loss": 0.25641506910324097 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.4198756217956543, + "learning_rate": 2.9037288398684762e-05, + "loss": 0.3045, + "step": 9296, + "teacher_loss": 0.2916848659515381 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.2558099627494812, + "learning_rate": 2.903648766693548e-05, + "loss": 0.2528, + "step": 9297, + "teacher_loss": 0.2524198889732361 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.45294368267059326, + "learning_rate": 2.9035686613371616e-05, + "loss": 0.3183, + "step": 9298, + "teacher_loss": 0.3033705949783325 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.7846300601959229, + "learning_rate": 2.9034885238011543e-05, + "loss": 0.4943, + "step": 9299, + "teacher_loss": 0.46204277873039246 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.4580080211162567, + "learning_rate": 2.9034083540873624e-05, + "loss": 0.2925, + "step": 9300, + "teacher_loss": 0.2740847170352936 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.23906934261322021, + "learning_rate": 2.9033281521976248e-05, + "loss": 0.2443, + "step": 9301, + "teacher_loss": 0.24485427141189575 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.7126936912536621, + "learning_rate": 2.9032479181337804e-05, + "loss": 0.4218, + "step": 9302, + "teacher_loss": 0.38953304290771484 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.42940568923950195, + "learning_rate": 2.903167651897668e-05, + "loss": 0.1962, + "step": 9303, + "teacher_loss": 0.17027972638607025 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.5111311674118042, + "learning_rate": 2.903087353491128e-05, + "loss": 0.2643, + "step": 9304, + "teacher_loss": 0.23687729239463806 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.4613502025604248, + "learning_rate": 2.903007022916002e-05, + "loss": 0.3221, + "step": 9305, + "teacher_loss": 0.3065764904022217 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.6822640895843506, + "learning_rate": 2.9029266601741317e-05, + "loss": 0.6515, + "step": 9306, + "teacher_loss": 0.6480617523193359 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.8899911642074585, + "learning_rate": 2.9028462652673588e-05, + "loss": 0.4088, + "step": 9307, + "teacher_loss": 0.35533642768859863 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.4414692521095276, + "learning_rate": 2.9027658381975273e-05, + "loss": 0.2003, + "step": 9308, + "teacher_loss": 0.17351919412612915 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.2900228202342987, + "learning_rate": 2.9026853789664808e-05, + "loss": 0.236, + "step": 9309, + "teacher_loss": 0.22996850311756134 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.7469189167022705, + "learning_rate": 2.902604887576064e-05, + "loss": 0.474, + "step": 9310, + "teacher_loss": 0.4436472952365875 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.40438586473464966, + "learning_rate": 2.9025243640281226e-05, + "loss": 0.2464, + "step": 9311, + "teacher_loss": 0.22882801294326782 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.3126750588417053, + "learning_rate": 2.9024438083245023e-05, + "loss": 0.2262, + "step": 9312, + "teacher_loss": 0.2165645956993103 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.8984996676445007, + "learning_rate": 2.9023632204670497e-05, + "loss": 0.3609, + "step": 9313, + "teacher_loss": 0.3011930584907532 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.9566714763641357, + "learning_rate": 2.9022826004576137e-05, + "loss": 0.3335, + "step": 9314, + "teacher_loss": 0.26422858238220215 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.6724084615707397, + "learning_rate": 2.9022019482980416e-05, + "loss": 0.56, + "step": 9315, + "teacher_loss": 0.547517716884613 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.2508290708065033, + "learning_rate": 2.902121263990183e-05, + "loss": 0.3716, + "step": 9316, + "teacher_loss": 0.3849754333496094 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.42668718099594116, + "learning_rate": 2.9020405475358872e-05, + "loss": 0.2073, + "step": 9317, + "teacher_loss": 0.1829012930393219 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.48074281215667725, + "learning_rate": 2.9019597989370055e-05, + "loss": 0.302, + "step": 9318, + "teacher_loss": 0.2821105122566223 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.22151502966880798, + "learning_rate": 2.9018790181953892e-05, + "loss": 0.2293, + "step": 9319, + "teacher_loss": 0.2301289588212967 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.2705234885215759, + "learning_rate": 2.9017982053128895e-05, + "loss": 0.2583, + "step": 9320, + "teacher_loss": 0.2569606900215149 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.31468263268470764, + "learning_rate": 2.90171736029136e-05, + "loss": 0.2213, + "step": 9321, + "teacher_loss": 0.21087901294231415 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.40045833587646484, + "learning_rate": 2.9016364831326538e-05, + "loss": 0.2242, + "step": 9322, + "teacher_loss": 0.20466187596321106 + }, + { + "compression_loss": 0.0, + "epoch": 1.68, + "label_loss": 0.28022196888923645, + "learning_rate": 2.9015555738386258e-05, + "loss": 0.191, + "step": 9323, + "teacher_loss": 0.18113242089748383 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.1975923627614975, + "learning_rate": 2.90147463241113e-05, + "loss": 0.214, + "step": 9324, + "teacher_loss": 0.21580807864665985 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.5414063334465027, + "learning_rate": 2.9013936588520235e-05, + "loss": 0.2507, + "step": 9325, + "teacher_loss": 0.218429297208786 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6998558640480042, + "learning_rate": 2.9013126531631616e-05, + "loss": 0.2906, + "step": 9326, + "teacher_loss": 0.24511653184890747 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.8140338659286499, + "learning_rate": 2.9012316153464017e-05, + "loss": 0.333, + "step": 9327, + "teacher_loss": 0.27954739332199097 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 1.1546919345855713, + "learning_rate": 2.901150545403602e-05, + "loss": 0.4454, + "step": 9328, + "teacher_loss": 0.36658209562301636 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4790791869163513, + "learning_rate": 2.9010694433366213e-05, + "loss": 0.3509, + "step": 9329, + "teacher_loss": 0.3366243839263916 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.3284538984298706, + "learning_rate": 2.900988309147319e-05, + "loss": 0.1535, + "step": 9330, + "teacher_loss": 0.1340821385383606 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6214761137962341, + "learning_rate": 2.900907142837555e-05, + "loss": 0.3882, + "step": 9331, + "teacher_loss": 0.3623168170452118 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.2939201593399048, + "learning_rate": 2.9008259444091906e-05, + "loss": 0.3834, + "step": 9332, + "teacher_loss": 0.39338138699531555 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.09034471213817596, + "learning_rate": 2.900744713864087e-05, + "loss": 0.196, + "step": 9333, + "teacher_loss": 0.2077823281288147 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4003854990005493, + "learning_rate": 2.9006634512041067e-05, + "loss": 0.192, + "step": 9334, + "teacher_loss": 0.16889601945877075 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.41204673051834106, + "learning_rate": 2.9005821564311127e-05, + "loss": 0.271, + "step": 9335, + "teacher_loss": 0.2553806006908417 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.577776312828064, + "learning_rate": 2.9005008295469696e-05, + "loss": 0.3016, + "step": 9336, + "teacher_loss": 0.2709653675556183 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.5871777534484863, + "learning_rate": 2.9004194705535405e-05, + "loss": 0.3534, + "step": 9337, + "teacher_loss": 0.3273926377296448 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.34036558866500854, + "learning_rate": 2.9003380794526926e-05, + "loss": 0.1979, + "step": 9338, + "teacher_loss": 0.18208590149879456 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.28699785470962524, + "learning_rate": 2.90025665624629e-05, + "loss": 0.3359, + "step": 9339, + "teacher_loss": 0.34132862091064453 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.2708887755870819, + "learning_rate": 2.9001752009362014e-05, + "loss": 0.1937, + "step": 9340, + "teacher_loss": 0.1851045787334442 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.263755738735199, + "learning_rate": 2.9000937135242932e-05, + "loss": 0.2355, + "step": 9341, + "teacher_loss": 0.23238103091716766 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.22057479619979858, + "learning_rate": 2.9000121940124333e-05, + "loss": 0.1736, + "step": 9342, + "teacher_loss": 0.16837728023529053 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6109610199928284, + "learning_rate": 2.8999306424024915e-05, + "loss": 0.4234, + "step": 9343, + "teacher_loss": 0.4026148021221161 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.5595384836196899, + "learning_rate": 2.8998490586963373e-05, + "loss": 0.237, + "step": 9344, + "teacher_loss": 0.20113666355609894 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6873195171356201, + "learning_rate": 2.899767442895841e-05, + "loss": 0.4059, + "step": 9345, + "teacher_loss": 0.37465405464172363 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4221692681312561, + "learning_rate": 2.8996857950028744e-05, + "loss": 0.2917, + "step": 9346, + "teacher_loss": 0.2772250175476074 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4763318598270416, + "learning_rate": 2.8996041150193084e-05, + "loss": 0.2319, + "step": 9347, + "teacher_loss": 0.20479440689086914 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.34323960542678833, + "learning_rate": 2.899522402947017e-05, + "loss": 0.1863, + "step": 9348, + "teacher_loss": 0.1688569039106369 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.5284562110900879, + "learning_rate": 2.8994406587878722e-05, + "loss": 0.2413, + "step": 9349, + "teacher_loss": 0.20940876007080078 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4097611904144287, + "learning_rate": 2.8993588825437493e-05, + "loss": 0.3131, + "step": 9350, + "teacher_loss": 0.30235347151756287 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.2623571455478668, + "learning_rate": 2.8992770742165226e-05, + "loss": 0.2217, + "step": 9351, + "teacher_loss": 0.21721863746643066 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.3416444659233093, + "learning_rate": 2.8991952338080677e-05, + "loss": 0.2405, + "step": 9352, + "teacher_loss": 0.229261577129364 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.7279170751571655, + "learning_rate": 2.8991133613202615e-05, + "loss": 0.4087, + "step": 9353, + "teacher_loss": 0.3732330799102783 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.5482463836669922, + "learning_rate": 2.89903145675498e-05, + "loss": 0.4332, + "step": 9354, + "teacher_loss": 0.4204011559486389 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.37647539377212524, + "learning_rate": 2.898949520114102e-05, + "loss": 0.3056, + "step": 9355, + "teacher_loss": 0.29775065183639526 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6451793313026428, + "learning_rate": 2.898867551399506e-05, + "loss": 0.2722, + "step": 9356, + "teacher_loss": 0.23076388239860535 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.5896247029304504, + "learning_rate": 2.8987855506130708e-05, + "loss": 0.3018, + "step": 9357, + "teacher_loss": 0.2698572278022766 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.22080765664577484, + "learning_rate": 2.8987035177566767e-05, + "loss": 0.2162, + "step": 9358, + "teacher_loss": 0.21566995978355408 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.3908550441265106, + "learning_rate": 2.8986214528322044e-05, + "loss": 0.2698, + "step": 9359, + "teacher_loss": 0.25636622309684753 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6212261319160461, + "learning_rate": 2.8985393558415364e-05, + "loss": 0.2096, + "step": 9360, + "teacher_loss": 0.16388392448425293 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.2948904037475586, + "learning_rate": 2.8984572267865527e-05, + "loss": 0.1651, + "step": 9361, + "teacher_loss": 0.15062521398067474 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.7019200325012207, + "learning_rate": 2.8983750656691385e-05, + "loss": 0.2572, + "step": 9362, + "teacher_loss": 0.20776838064193726 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4429117441177368, + "learning_rate": 2.8982928724911762e-05, + "loss": 0.3066, + "step": 9363, + "teacher_loss": 0.29147735238075256 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4424898624420166, + "learning_rate": 2.8982106472545507e-05, + "loss": 0.1802, + "step": 9364, + "teacher_loss": 0.15107297897338867 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6375385522842407, + "learning_rate": 2.898128389961147e-05, + "loss": 0.4571, + "step": 9365, + "teacher_loss": 0.4371042251586914 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6873155832290649, + "learning_rate": 2.8980461006128512e-05, + "loss": 0.3902, + "step": 9366, + "teacher_loss": 0.3571946620941162 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6286658048629761, + "learning_rate": 2.89796377921155e-05, + "loss": 0.3648, + "step": 9367, + "teacher_loss": 0.3354980945587158 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.4112156629562378, + "learning_rate": 2.8978814257591304e-05, + "loss": 0.2148, + "step": 9368, + "teacher_loss": 0.19303107261657715 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.8636107444763184, + "learning_rate": 2.897799040257481e-05, + "loss": 0.5014, + "step": 9369, + "teacher_loss": 0.46116888523101807 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.46559083461761475, + "learning_rate": 2.8977166227084903e-05, + "loss": 0.2432, + "step": 9370, + "teacher_loss": 0.21844364702701569 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.318211168050766, + "learning_rate": 2.8976341731140478e-05, + "loss": 0.4443, + "step": 9371, + "teacher_loss": 0.458340048789978 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.22801688313484192, + "learning_rate": 2.8975516914760442e-05, + "loss": 0.1996, + "step": 9372, + "teacher_loss": 0.19641321897506714 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.33171647787094116, + "learning_rate": 2.8974691777963707e-05, + "loss": 0.2859, + "step": 9373, + "teacher_loss": 0.2807769477367401 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.16290518641471863, + "learning_rate": 2.8973866320769186e-05, + "loss": 0.2152, + "step": 9374, + "teacher_loss": 0.22104278206825256 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.9149197340011597, + "learning_rate": 2.8973040543195803e-05, + "loss": 0.3312, + "step": 9375, + "teacher_loss": 0.2663334012031555 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.6135046482086182, + "learning_rate": 2.8972214445262498e-05, + "loss": 0.3627, + "step": 9376, + "teacher_loss": 0.33479487895965576 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.8560186624526978, + "learning_rate": 2.8971388026988203e-05, + "loss": 0.3623, + "step": 9377, + "teacher_loss": 0.30742496252059937 + }, + { + "compression_loss": 0.0, + "epoch": 1.69, + "label_loss": 0.490791916847229, + "learning_rate": 2.8970561288391872e-05, + "loss": 0.4867, + "step": 9378, + "teacher_loss": 0.48624151945114136 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5349369049072266, + "learning_rate": 2.8969734229492455e-05, + "loss": 0.2826, + "step": 9379, + "teacher_loss": 0.25461798906326294 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.395302414894104, + "learning_rate": 2.8968906850308918e-05, + "loss": 0.2534, + "step": 9380, + "teacher_loss": 0.2376861721277237 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.2770238518714905, + "learning_rate": 2.8968079150860228e-05, + "loss": 0.2242, + "step": 9381, + "teacher_loss": 0.21831226348876953 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.6028745174407959, + "learning_rate": 2.8967251131165358e-05, + "loss": 0.3468, + "step": 9382, + "teacher_loss": 0.31834447383880615 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.6734985113143921, + "learning_rate": 2.8966422791243296e-05, + "loss": 0.2643, + "step": 9383, + "teacher_loss": 0.2188381850719452 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4535084664821625, + "learning_rate": 2.8965594131113036e-05, + "loss": 0.1933, + "step": 9384, + "teacher_loss": 0.16440461575984955 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.718681812286377, + "learning_rate": 2.896476515079357e-05, + "loss": 0.3814, + "step": 9385, + "teacher_loss": 0.34392493963241577 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5419446229934692, + "learning_rate": 2.8963935850303905e-05, + "loss": 0.3539, + "step": 9386, + "teacher_loss": 0.333030641078949 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.2921307682991028, + "learning_rate": 2.8963106229663064e-05, + "loss": 0.3177, + "step": 9387, + "teacher_loss": 0.3205293118953705 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5811346769332886, + "learning_rate": 2.896227628889006e-05, + "loss": 0.3763, + "step": 9388, + "teacher_loss": 0.3535630404949188 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.22378309071063995, + "learning_rate": 2.8961446028003918e-05, + "loss": 0.198, + "step": 9389, + "teacher_loss": 0.19511735439300537 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.19910335540771484, + "learning_rate": 2.896061544702368e-05, + "loss": 0.1917, + "step": 9390, + "teacher_loss": 0.19091831147670746 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.8366166353225708, + "learning_rate": 2.8959784545968384e-05, + "loss": 0.5431, + "step": 9391, + "teacher_loss": 0.5104920268058777 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.2722296416759491, + "learning_rate": 2.895895332485708e-05, + "loss": 0.2188, + "step": 9392, + "teacher_loss": 0.21286682784557343 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.43733450770378113, + "learning_rate": 2.8958121783708832e-05, + "loss": 0.2274, + "step": 9393, + "teacher_loss": 0.20403876900672913 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.30129778385162354, + "learning_rate": 2.89572899225427e-05, + "loss": 0.2402, + "step": 9394, + "teacher_loss": 0.23338648676872253 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.19690775871276855, + "learning_rate": 2.8956457741377753e-05, + "loss": 0.226, + "step": 9395, + "teacher_loss": 0.22919979691505432 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.18240909278392792, + "learning_rate": 2.895562524023308e-05, + "loss": 0.1905, + "step": 9396, + "teacher_loss": 0.19138428568840027 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.35369741916656494, + "learning_rate": 2.8954792419127753e-05, + "loss": 0.3409, + "step": 9397, + "teacher_loss": 0.3394266366958618 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5802950263023376, + "learning_rate": 2.8953959278080875e-05, + "loss": 0.309, + "step": 9398, + "teacher_loss": 0.2788654565811157 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4208073616027832, + "learning_rate": 2.8953125817111553e-05, + "loss": 0.3893, + "step": 9399, + "teacher_loss": 0.3858261704444885 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.41641998291015625, + "learning_rate": 2.8952292036238884e-05, + "loss": 0.2528, + "step": 9400, + "teacher_loss": 0.23463964462280273 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4171334505081177, + "learning_rate": 2.8951457935481992e-05, + "loss": 0.3333, + "step": 9401, + "teacher_loss": 0.32397592067718506 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.8566931486129761, + "learning_rate": 2.8950623514859998e-05, + "loss": 0.2589, + "step": 9402, + "teacher_loss": 0.19252698123455048 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.42989400029182434, + "learning_rate": 2.894978877439203e-05, + "loss": 0.3076, + "step": 9403, + "teacher_loss": 0.2939668893814087 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.31020134687423706, + "learning_rate": 2.8948953714097237e-05, + "loss": 0.2101, + "step": 9404, + "teacher_loss": 0.1990264356136322 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4747995138168335, + "learning_rate": 2.8948118333994748e-05, + "loss": 0.3029, + "step": 9405, + "teacher_loss": 0.2838355302810669 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.22408822178840637, + "learning_rate": 2.8947282634103732e-05, + "loss": 0.1645, + "step": 9406, + "teacher_loss": 0.15789492428302765 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5716903209686279, + "learning_rate": 2.8946446614443335e-05, + "loss": 0.3312, + "step": 9407, + "teacher_loss": 0.30449479818344116 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.14879626035690308, + "learning_rate": 2.8945610275032733e-05, + "loss": 0.175, + "step": 9408, + "teacher_loss": 0.17791524529457092 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.7454357147216797, + "learning_rate": 2.8944773615891097e-05, + "loss": 0.3948, + "step": 9409, + "teacher_loss": 0.3558773994445801 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 1.1078578233718872, + "learning_rate": 2.8943936637037614e-05, + "loss": 0.5531, + "step": 9410, + "teacher_loss": 0.49150386452674866 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.35919398069381714, + "learning_rate": 2.894309933849147e-05, + "loss": 0.3492, + "step": 9411, + "teacher_loss": 0.3481142818927765 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.2908899188041687, + "learning_rate": 2.894226172027186e-05, + "loss": 0.1726, + "step": 9412, + "teacher_loss": 0.15947763621807098 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.6169778108596802, + "learning_rate": 2.8941423782397987e-05, + "loss": 0.2787, + "step": 9413, + "teacher_loss": 0.24109011888504028 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4549195170402527, + "learning_rate": 2.894058552488907e-05, + "loss": 0.4342, + "step": 9414, + "teacher_loss": 0.43191346526145935 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.2947240471839905, + "learning_rate": 2.8939746947764317e-05, + "loss": 0.2585, + "step": 9415, + "teacher_loss": 0.25447508692741394 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.371695876121521, + "learning_rate": 2.8938908051042965e-05, + "loss": 0.2558, + "step": 9416, + "teacher_loss": 0.24297265708446503 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.547249436378479, + "learning_rate": 2.8938068834744243e-05, + "loss": 0.3321, + "step": 9417, + "teacher_loss": 0.3082178831100464 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.522907555103302, + "learning_rate": 2.8937229298887387e-05, + "loss": 0.2138, + "step": 9418, + "teacher_loss": 0.17947739362716675 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5087186098098755, + "learning_rate": 2.8936389443491655e-05, + "loss": 0.2777, + "step": 9419, + "teacher_loss": 0.2520167827606201 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 1.0342810153961182, + "learning_rate": 2.8935549268576294e-05, + "loss": 0.4388, + "step": 9420, + "teacher_loss": 0.37267762422561646 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.35705363750457764, + "learning_rate": 2.8934708774160566e-05, + "loss": 0.2072, + "step": 9421, + "teacher_loss": 0.1905396580696106 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4151454269886017, + "learning_rate": 2.8933867960263746e-05, + "loss": 0.278, + "step": 9422, + "teacher_loss": 0.26272812485694885 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.32695525884628296, + "learning_rate": 2.893302682690511e-05, + "loss": 0.205, + "step": 9423, + "teacher_loss": 0.19142332673072815 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.38215094804763794, + "learning_rate": 2.8932185374103946e-05, + "loss": 0.2767, + "step": 9424, + "teacher_loss": 0.2649814784526825 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.13310247659683228, + "learning_rate": 2.8931343601879535e-05, + "loss": 0.2512, + "step": 9425, + "teacher_loss": 0.26430368423461914 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.8965725898742676, + "learning_rate": 2.8930501510251187e-05, + "loss": 0.4264, + "step": 9426, + "teacher_loss": 0.37413665652275085 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.4012833535671234, + "learning_rate": 2.8929659099238207e-05, + "loss": 0.2343, + "step": 9427, + "teacher_loss": 0.21578149497509003 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.2685379981994629, + "learning_rate": 2.8928816368859904e-05, + "loss": 0.2122, + "step": 9428, + "teacher_loss": 0.20594459772109985 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.3332647383213043, + "learning_rate": 2.8927973319135605e-05, + "loss": 0.2259, + "step": 9429, + "teacher_loss": 0.21392974257469177 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.5013045072555542, + "learning_rate": 2.892712995008463e-05, + "loss": 0.3194, + "step": 9430, + "teacher_loss": 0.2992364168167114 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.24649418890476227, + "learning_rate": 2.892628626172633e-05, + "loss": 0.2402, + "step": 9431, + "teacher_loss": 0.2394467443227768 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.38722261786460876, + "learning_rate": 2.8925442254080036e-05, + "loss": 0.3463, + "step": 9432, + "teacher_loss": 0.34170621633529663 + }, + { + "compression_loss": 0.0, + "epoch": 1.7, + "label_loss": 0.3369593620300293, + "learning_rate": 2.89245979271651e-05, + "loss": 0.1657, + "step": 9433, + "teacher_loss": 0.14661921560764313 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.3820837736129761, + "learning_rate": 2.8923753281000884e-05, + "loss": 0.2047, + "step": 9434, + "teacher_loss": 0.18502630293369293 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5342718362808228, + "learning_rate": 2.892290831560675e-05, + "loss": 0.3058, + "step": 9435, + "teacher_loss": 0.28038448095321655 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.36089733242988586, + "learning_rate": 2.8922063031002067e-05, + "loss": 0.1716, + "step": 9436, + "teacher_loss": 0.15051597356796265 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.1848730891942978, + "learning_rate": 2.8921217427206227e-05, + "loss": 0.3084, + "step": 9437, + "teacher_loss": 0.3221457004547119 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5581762790679932, + "learning_rate": 2.89203715042386e-05, + "loss": 0.3999, + "step": 9438, + "teacher_loss": 0.3822594881057739 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.6545029878616333, + "learning_rate": 2.8919525262118596e-05, + "loss": 0.2324, + "step": 9439, + "teacher_loss": 0.1855495274066925 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.6595101952552795, + "learning_rate": 2.8918678700865613e-05, + "loss": 0.3825, + "step": 9440, + "teacher_loss": 0.35174164175987244 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.3666520118713379, + "learning_rate": 2.8917831820499055e-05, + "loss": 0.2493, + "step": 9441, + "teacher_loss": 0.236236110329628 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5610173940658569, + "learning_rate": 2.8916984621038337e-05, + "loss": 0.2606, + "step": 9442, + "teacher_loss": 0.22720874845981598 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.6348270177841187, + "learning_rate": 2.891613710250289e-05, + "loss": 0.3233, + "step": 9443, + "teacher_loss": 0.28865164518356323 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.2001899778842926, + "learning_rate": 2.8915289264912143e-05, + "loss": 0.2293, + "step": 9444, + "teacher_loss": 0.2325005829334259 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.48045435547828674, + "learning_rate": 2.8914441108285533e-05, + "loss": 0.2276, + "step": 9445, + "teacher_loss": 0.19946104288101196 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5375640988349915, + "learning_rate": 2.8913592632642507e-05, + "loss": 0.2781, + "step": 9446, + "teacher_loss": 0.24926477670669556 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.25137245655059814, + "learning_rate": 2.891274383800251e-05, + "loss": 0.2296, + "step": 9447, + "teacher_loss": 0.22719906270503998 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.6816695928573608, + "learning_rate": 2.8911894724385023e-05, + "loss": 0.3403, + "step": 9448, + "teacher_loss": 0.30235934257507324 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.723246693611145, + "learning_rate": 2.891104529180949e-05, + "loss": 0.3455, + "step": 9449, + "teacher_loss": 0.3035595715045929 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.2292892336845398, + "learning_rate": 2.8910195540295398e-05, + "loss": 0.2591, + "step": 9450, + "teacher_loss": 0.2623632550239563 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.42785611748695374, + "learning_rate": 2.8909345469862228e-05, + "loss": 0.4496, + "step": 9451, + "teacher_loss": 0.45201438665390015 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.2561454772949219, + "learning_rate": 2.8908495080529468e-05, + "loss": 0.265, + "step": 9452, + "teacher_loss": 0.26600703597068787 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.28792378306388855, + "learning_rate": 2.8907644372316616e-05, + "loss": 0.2421, + "step": 9453, + "teacher_loss": 0.23696283996105194 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.3974774479866028, + "learning_rate": 2.8906793345243175e-05, + "loss": 0.2509, + "step": 9454, + "teacher_loss": 0.2346021682024002 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.32158780097961426, + "learning_rate": 2.8905941999328657e-05, + "loss": 0.1863, + "step": 9455, + "teacher_loss": 0.17122483253479004 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.38021501898765564, + "learning_rate": 2.8905090334592585e-05, + "loss": 0.1803, + "step": 9456, + "teacher_loss": 0.1580427587032318 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.7404710054397583, + "learning_rate": 2.890423835105448e-05, + "loss": 0.4427, + "step": 9457, + "teacher_loss": 0.4096405506134033 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 1.1754100322723389, + "learning_rate": 2.890338604873387e-05, + "loss": 0.903, + "step": 9458, + "teacher_loss": 0.8727257251739502 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5227789282798767, + "learning_rate": 2.890253342765031e-05, + "loss": 0.2685, + "step": 9459, + "teacher_loss": 0.24024814367294312 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.4979461431503296, + "learning_rate": 2.8901680487823337e-05, + "loss": 0.2761, + "step": 9460, + "teacher_loss": 0.25143519043922424 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5062993168830872, + "learning_rate": 2.890082722927251e-05, + "loss": 0.3372, + "step": 9461, + "teacher_loss": 0.3184327483177185 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.34944581985473633, + "learning_rate": 2.889997365201739e-05, + "loss": 0.316, + "step": 9462, + "teacher_loss": 0.3122739791870117 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.29811620712280273, + "learning_rate": 2.8899119756077558e-05, + "loss": 0.2164, + "step": 9463, + "teacher_loss": 0.20732466876506805 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.3391658067703247, + "learning_rate": 2.8898265541472573e-05, + "loss": 0.1876, + "step": 9464, + "teacher_loss": 0.1708112359046936 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.29614150524139404, + "learning_rate": 2.8897411008222026e-05, + "loss": 0.3214, + "step": 9465, + "teacher_loss": 0.32418137788772583 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.27397704124450684, + "learning_rate": 2.8896556156345518e-05, + "loss": 0.2577, + "step": 9466, + "teacher_loss": 0.2559289336204529 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.7105743288993835, + "learning_rate": 2.889570098586264e-05, + "loss": 0.3178, + "step": 9467, + "teacher_loss": 0.27412912249565125 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.37822291254997253, + "learning_rate": 2.8894845496792995e-05, + "loss": 0.2762, + "step": 9468, + "teacher_loss": 0.2648215889930725 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5535728931427002, + "learning_rate": 2.8893989689156202e-05, + "loss": 0.3037, + "step": 9469, + "teacher_loss": 0.2759411931037903 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.1917286515235901, + "learning_rate": 2.8893133562971886e-05, + "loss": 0.2832, + "step": 9470, + "teacher_loss": 0.2933412194252014 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.3179375231266022, + "learning_rate": 2.889227711825967e-05, + "loss": 0.2917, + "step": 9471, + "teacher_loss": 0.2887392044067383 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.4145300090312958, + "learning_rate": 2.8891420355039193e-05, + "loss": 0.2825, + "step": 9472, + "teacher_loss": 0.26777493953704834 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.35148924589157104, + "learning_rate": 2.889056327333009e-05, + "loss": 0.1884, + "step": 9473, + "teacher_loss": 0.1702927201986313 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5350561738014221, + "learning_rate": 2.8889705873152024e-05, + "loss": 0.319, + "step": 9474, + "teacher_loss": 0.29494708776474 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.3603099584579468, + "learning_rate": 2.8888848154524642e-05, + "loss": 0.3167, + "step": 9475, + "teacher_loss": 0.3118588328361511 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5691217184066772, + "learning_rate": 2.888799011746761e-05, + "loss": 0.2854, + "step": 9476, + "teacher_loss": 0.25384092330932617 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.4562886953353882, + "learning_rate": 2.8887131762000605e-05, + "loss": 0.2421, + "step": 9477, + "teacher_loss": 0.21830147504806519 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.666765034198761, + "learning_rate": 2.888627308814331e-05, + "loss": 0.3631, + "step": 9478, + "teacher_loss": 0.329380601644516 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5053094029426575, + "learning_rate": 2.88854140959154e-05, + "loss": 0.4681, + "step": 9479, + "teacher_loss": 0.4640064537525177 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.6324402093887329, + "learning_rate": 2.8884554785336577e-05, + "loss": 0.278, + "step": 9480, + "teacher_loss": 0.23859205842018127 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.47667816281318665, + "learning_rate": 2.8883695156426544e-05, + "loss": 0.2634, + "step": 9481, + "teacher_loss": 0.2396690398454666 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.1979219764471054, + "learning_rate": 2.8882835209205e-05, + "loss": 0.2505, + "step": 9482, + "teacher_loss": 0.25639188289642334 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5739365816116333, + "learning_rate": 2.8881974943691676e-05, + "loss": 0.2762, + "step": 9483, + "teacher_loss": 0.24312105774879456 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.642431378364563, + "learning_rate": 2.8881114359906283e-05, + "loss": 0.2915, + "step": 9484, + "teacher_loss": 0.2524959444999695 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.35984623432159424, + "learning_rate": 2.8880253457868552e-05, + "loss": 0.2223, + "step": 9485, + "teacher_loss": 0.20703119039535522 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5318686366081238, + "learning_rate": 2.8879392237598225e-05, + "loss": 0.2296, + "step": 9486, + "teacher_loss": 0.1960306167602539 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.40393245220184326, + "learning_rate": 2.8878530699115052e-05, + "loss": 0.2633, + "step": 9487, + "teacher_loss": 0.24766096472740173 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.060222674161195755, + "learning_rate": 2.8877668842438775e-05, + "loss": 0.1607, + "step": 9488, + "teacher_loss": 0.17185035347938538 + }, + { + "compression_loss": 0.0, + "epoch": 1.71, + "label_loss": 0.5454633235931396, + "learning_rate": 2.887680666758916e-05, + "loss": 0.2958, + "step": 9489, + "teacher_loss": 0.2680458724498749 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.5859564542770386, + "learning_rate": 2.8875944174585972e-05, + "loss": 0.3076, + "step": 9490, + "teacher_loss": 0.2766672372817993 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.20699137449264526, + "learning_rate": 2.887508136344899e-05, + "loss": 0.2317, + "step": 9491, + "teacher_loss": 0.23446646332740784 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 1.1290473937988281, + "learning_rate": 2.887421823419799e-05, + "loss": 0.8173, + "step": 9492, + "teacher_loss": 0.782699465751648 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.15283741056919098, + "learning_rate": 2.8873354786852762e-05, + "loss": 0.179, + "step": 9493, + "teacher_loss": 0.18190765380859375 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.35447508096694946, + "learning_rate": 2.8872491021433105e-05, + "loss": 0.3241, + "step": 9494, + "teacher_loss": 0.320762574672699 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.590571403503418, + "learning_rate": 2.8871626937958818e-05, + "loss": 0.2874, + "step": 9495, + "teacher_loss": 0.2536930441856384 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.6005941033363342, + "learning_rate": 2.8870762536449714e-05, + "loss": 0.307, + "step": 9496, + "teacher_loss": 0.27433955669403076 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.3539612591266632, + "learning_rate": 2.8869897816925616e-05, + "loss": 0.2314, + "step": 9497, + "teacher_loss": 0.21774816513061523 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.6836268305778503, + "learning_rate": 2.8869032779406343e-05, + "loss": 0.4109, + "step": 9498, + "teacher_loss": 0.3806215524673462 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.44198447465896606, + "learning_rate": 2.8868167423911732e-05, + "loss": 0.2016, + "step": 9499, + "teacher_loss": 0.1748543083667755 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.3481265902519226, + "learning_rate": 2.8867301750461616e-05, + "loss": 0.1744, + "step": 9500, + "teacher_loss": 0.1551232486963272 + }, + { + "epoch": 1.72, + "eval_exact_match": 79.40397350993378, + "eval_f1": 86.6925610959982, + "step": 9500 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.7204965949058533, + "learning_rate": 2.886643575907585e-05, + "loss": 0.3576, + "step": 9501, + "teacher_loss": 0.31725066900253296 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.38294681906700134, + "learning_rate": 2.8865569449774287e-05, + "loss": 0.2467, + "step": 9502, + "teacher_loss": 0.23151350021362305 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.6223882436752319, + "learning_rate": 2.886470282257679e-05, + "loss": 0.3173, + "step": 9503, + "teacher_loss": 0.2833506166934967 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.3250850737094879, + "learning_rate": 2.8863835877503218e-05, + "loss": 0.2244, + "step": 9504, + "teacher_loss": 0.21319669485092163 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.47057026624679565, + "learning_rate": 2.8862968614573463e-05, + "loss": 0.282, + "step": 9505, + "teacher_loss": 0.261068731546402 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.5378087759017944, + "learning_rate": 2.8862101033807398e-05, + "loss": 0.3104, + "step": 9506, + "teacher_loss": 0.2851082682609558 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.8102796077728271, + "learning_rate": 2.8861233135224915e-05, + "loss": 0.2784, + "step": 9507, + "teacher_loss": 0.21929779648780823 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.17647671699523926, + "learning_rate": 2.8860364918845916e-05, + "loss": 0.1774, + "step": 9508, + "teacher_loss": 0.17752380669116974 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.9248626232147217, + "learning_rate": 2.8859496384690306e-05, + "loss": 0.3532, + "step": 9509, + "teacher_loss": 0.2897075414657593 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.7064296007156372, + "learning_rate": 2.8858627532777993e-05, + "loss": 0.2722, + "step": 9510, + "teacher_loss": 0.2239123433828354 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.7216962575912476, + "learning_rate": 2.8857758363128908e-05, + "loss": 0.4304, + "step": 9511, + "teacher_loss": 0.39804044365882874 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.1719408929347992, + "learning_rate": 2.8856888875762965e-05, + "loss": 0.2071, + "step": 9512, + "teacher_loss": 0.21104782819747925 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.3332878053188324, + "learning_rate": 2.8856019070700107e-05, + "loss": 0.2821, + "step": 9513, + "teacher_loss": 0.2764579653739929 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.7348049283027649, + "learning_rate": 2.8855148947960273e-05, + "loss": 0.3929, + "step": 9514, + "teacher_loss": 0.354946494102478 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.2960263192653656, + "learning_rate": 2.885427850756342e-05, + "loss": 0.2396, + "step": 9515, + "teacher_loss": 0.23337848484516144 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.6953973770141602, + "learning_rate": 2.8853407749529487e-05, + "loss": 0.2476, + "step": 9516, + "teacher_loss": 0.19779713451862335 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.548407793045044, + "learning_rate": 2.8852536673878458e-05, + "loss": 0.3938, + "step": 9517, + "teacher_loss": 0.3766377568244934 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.2611948847770691, + "learning_rate": 2.8851665280630294e-05, + "loss": 0.2295, + "step": 9518, + "teacher_loss": 0.22601871192455292 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.25542911887168884, + "learning_rate": 2.8850793569804968e-05, + "loss": 0.1832, + "step": 9519, + "teacher_loss": 0.17520998418331146 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.29920563101768494, + "learning_rate": 2.8849921541422478e-05, + "loss": 0.2003, + "step": 9520, + "teacher_loss": 0.18930459022521973 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.5232792496681213, + "learning_rate": 2.8849049195502812e-05, + "loss": 0.3403, + "step": 9521, + "teacher_loss": 0.32001471519470215 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.2496078610420227, + "learning_rate": 2.884817653206597e-05, + "loss": 0.1706, + "step": 9522, + "teacher_loss": 0.161823570728302 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.4752728343009949, + "learning_rate": 2.8847303551131957e-05, + "loss": 0.3294, + "step": 9523, + "teacher_loss": 0.3132280707359314 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.18546488881111145, + "learning_rate": 2.8846430252720788e-05, + "loss": 0.1915, + "step": 9524, + "teacher_loss": 0.19219109416007996 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.4187684655189514, + "learning_rate": 2.8845556636852492e-05, + "loss": 0.3218, + "step": 9525, + "teacher_loss": 0.31099268794059753 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.5659390687942505, + "learning_rate": 2.884468270354709e-05, + "loss": 0.2791, + "step": 9526, + "teacher_loss": 0.24724087119102478 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.43867403268814087, + "learning_rate": 2.8843808452824622e-05, + "loss": 0.3709, + "step": 9527, + "teacher_loss": 0.363379567861557 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.7753872871398926, + "learning_rate": 2.8842933884705132e-05, + "loss": 0.3681, + "step": 9528, + "teacher_loss": 0.32280439138412476 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.2733902931213379, + "learning_rate": 2.8842058999208672e-05, + "loss": 0.1742, + "step": 9529, + "teacher_loss": 0.16316324472427368 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.1690545380115509, + "learning_rate": 2.88411837963553e-05, + "loss": 0.1528, + "step": 9530, + "teacher_loss": 0.15098294615745544 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.41097545623779297, + "learning_rate": 2.8840308276165085e-05, + "loss": 0.2539, + "step": 9531, + "teacher_loss": 0.2364916205406189 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.45074227452278137, + "learning_rate": 2.883943243865809e-05, + "loss": 0.3243, + "step": 9532, + "teacher_loss": 0.3102171719074249 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.6656568646430969, + "learning_rate": 2.8838556283854403e-05, + "loss": 0.2949, + "step": 9533, + "teacher_loss": 0.2536671459674835 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.3068510890007019, + "learning_rate": 2.8837679811774116e-05, + "loss": 0.2532, + "step": 9534, + "teacher_loss": 0.2472032755613327 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.4407150149345398, + "learning_rate": 2.8836803022437316e-05, + "loss": 0.2285, + "step": 9535, + "teacher_loss": 0.20488600432872772 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.32777759432792664, + "learning_rate": 2.8835925915864103e-05, + "loss": 0.2882, + "step": 9536, + "teacher_loss": 0.28377920389175415 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.4718494415283203, + "learning_rate": 2.8835048492074594e-05, + "loss": 0.2811, + "step": 9537, + "teacher_loss": 0.25991135835647583 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.4691272974014282, + "learning_rate": 2.8834170751088906e-05, + "loss": 0.5632, + "step": 9538, + "teacher_loss": 0.573616087436676 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.3983680009841919, + "learning_rate": 2.8833292692927156e-05, + "loss": 0.2079, + "step": 9539, + "teacher_loss": 0.18676550686359406 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.7350836992263794, + "learning_rate": 2.883241431760948e-05, + "loss": 0.227, + "step": 9540, + "teacher_loss": 0.1705557107925415 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.6084218621253967, + "learning_rate": 2.8831535625156013e-05, + "loss": 0.406, + "step": 9541, + "teacher_loss": 0.38354188203811646 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.15079958736896515, + "learning_rate": 2.8830656615586905e-05, + "loss": 0.1956, + "step": 9542, + "teacher_loss": 0.20052430033683777 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 0.423688679933548, + "learning_rate": 2.882977728892231e-05, + "loss": 0.2814, + "step": 9543, + "teacher_loss": 0.26560455560684204 + }, + { + "compression_loss": 0.0, + "epoch": 1.72, + "label_loss": 1.216416358947754, + "learning_rate": 2.8828897645182383e-05, + "loss": 0.4313, + "step": 9544, + "teacher_loss": 0.3441035747528076 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.4851227402687073, + "learning_rate": 2.8828017684387296e-05, + "loss": 0.2141, + "step": 9545, + "teacher_loss": 0.18393851816654205 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5699038505554199, + "learning_rate": 2.882713740655722e-05, + "loss": 0.4448, + "step": 9546, + "teacher_loss": 0.4308568239212036 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.41542109847068787, + "learning_rate": 2.882625681171234e-05, + "loss": 0.2142, + "step": 9547, + "teacher_loss": 0.19188442826271057 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.46402907371520996, + "learning_rate": 2.8825375899872843e-05, + "loss": 0.3516, + "step": 9548, + "teacher_loss": 0.33909082412719727 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.2189243584871292, + "learning_rate": 2.8824494671058927e-05, + "loss": 0.1673, + "step": 9549, + "teacher_loss": 0.16158095002174377 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.33084186911582947, + "learning_rate": 2.88236131252908e-05, + "loss": 0.2669, + "step": 9550, + "teacher_loss": 0.25983017683029175 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.2684520184993744, + "learning_rate": 2.8822731262588666e-05, + "loss": 0.1999, + "step": 9551, + "teacher_loss": 0.1922830045223236 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.3193325698375702, + "learning_rate": 2.882184908297275e-05, + "loss": 0.355, + "step": 9552, + "teacher_loss": 0.35893672704696655 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.6582856178283691, + "learning_rate": 2.8820966586463272e-05, + "loss": 0.3184, + "step": 9553, + "teacher_loss": 0.28059476613998413 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.7786076664924622, + "learning_rate": 2.8820083773080468e-05, + "loss": 0.5445, + "step": 9554, + "teacher_loss": 0.5184370875358582 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5447124242782593, + "learning_rate": 2.8819200642844576e-05, + "loss": 0.3995, + "step": 9555, + "teacher_loss": 0.38336145877838135 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.7425686120986938, + "learning_rate": 2.8818317195775848e-05, + "loss": 0.2938, + "step": 9556, + "teacher_loss": 0.24389465153217316 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.31502705812454224, + "learning_rate": 2.8817433431894537e-05, + "loss": 0.3006, + "step": 9557, + "teacher_loss": 0.29899781942367554 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.34349325299263, + "learning_rate": 2.8816549351220902e-05, + "loss": 0.4821, + "step": 9558, + "teacher_loss": 0.49752652645111084 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.20276644825935364, + "learning_rate": 2.8815664953775213e-05, + "loss": 0.2418, + "step": 9559, + "teacher_loss": 0.24610371887683868 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.38882219791412354, + "learning_rate": 2.8814780239577753e-05, + "loss": 0.2335, + "step": 9560, + "teacher_loss": 0.21620629727840424 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.6596754193305969, + "learning_rate": 2.8813895208648794e-05, + "loss": 0.2348, + "step": 9561, + "teacher_loss": 0.1875821053981781 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.4240890145301819, + "learning_rate": 2.881300986100864e-05, + "loss": 0.3894, + "step": 9562, + "teacher_loss": 0.385598361492157 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.28374576568603516, + "learning_rate": 2.8812124196677585e-05, + "loss": 0.2269, + "step": 9563, + "teacher_loss": 0.22059650719165802 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.4922718405723572, + "learning_rate": 2.881123821567593e-05, + "loss": 0.2571, + "step": 9564, + "teacher_loss": 0.230948805809021 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.34443995356559753, + "learning_rate": 2.8810351918023992e-05, + "loss": 0.2683, + "step": 9565, + "teacher_loss": 0.25980913639068604 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.4274241626262665, + "learning_rate": 2.8809465303742088e-05, + "loss": 0.1775, + "step": 9566, + "teacher_loss": 0.1497097611427307 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.592334508895874, + "learning_rate": 2.8808578372850544e-05, + "loss": 0.2993, + "step": 9567, + "teacher_loss": 0.2667018473148346 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.25950899720191956, + "learning_rate": 2.8807691125369706e-05, + "loss": 0.2096, + "step": 9568, + "teacher_loss": 0.20408445596694946 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5297172665596008, + "learning_rate": 2.8806803561319903e-05, + "loss": 0.3497, + "step": 9569, + "teacher_loss": 0.32970625162124634 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.7672590017318726, + "learning_rate": 2.8805915680721493e-05, + "loss": 0.324, + "step": 9570, + "teacher_loss": 0.274752676486969 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.17691412568092346, + "learning_rate": 2.8805027483594827e-05, + "loss": 0.1736, + "step": 9571, + "teacher_loss": 0.17319121956825256 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.8751059770584106, + "learning_rate": 2.880413896996027e-05, + "loss": 0.4666, + "step": 9572, + "teacher_loss": 0.42123743891716003 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5520852208137512, + "learning_rate": 2.8803250139838196e-05, + "loss": 0.4019, + "step": 9573, + "teacher_loss": 0.3852146565914154 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.46499693393707275, + "learning_rate": 2.8802360993248972e-05, + "loss": 0.234, + "step": 9574, + "teacher_loss": 0.20837755501270294 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.2144802361726761, + "learning_rate": 2.8801471530213002e-05, + "loss": 0.2457, + "step": 9575, + "teacher_loss": 0.2491755336523056 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.31127727031707764, + "learning_rate": 2.8800581750750666e-05, + "loss": 0.239, + "step": 9576, + "teacher_loss": 0.23097771406173706 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.35348987579345703, + "learning_rate": 2.8799691654882365e-05, + "loss": 0.1915, + "step": 9577, + "teacher_loss": 0.1735423505306244 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.27336373925209045, + "learning_rate": 2.8798801242628504e-05, + "loss": 0.1837, + "step": 9578, + "teacher_loss": 0.1737854927778244 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.22664445638656616, + "learning_rate": 2.8797910514009508e-05, + "loss": 0.2718, + "step": 9579, + "teacher_loss": 0.2767956852912903 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.4108770787715912, + "learning_rate": 2.8797019469045793e-05, + "loss": 0.2701, + "step": 9580, + "teacher_loss": 0.25441914796829224 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.7577189207077026, + "learning_rate": 2.8796128107757784e-05, + "loss": 0.3789, + "step": 9581, + "teacher_loss": 0.3368479013442993 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.14617453515529633, + "learning_rate": 2.879523643016592e-05, + "loss": 0.2945, + "step": 9582, + "teacher_loss": 0.3109666705131531 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.40665876865386963, + "learning_rate": 2.8794344436290645e-05, + "loss": 0.2607, + "step": 9583, + "teacher_loss": 0.2445174604654312 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 1.2795318365097046, + "learning_rate": 2.879345212615241e-05, + "loss": 0.4575, + "step": 9584, + "teacher_loss": 0.3661792278289795 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5661550760269165, + "learning_rate": 2.8792559499771672e-05, + "loss": 0.4207, + "step": 9585, + "teacher_loss": 0.40448668599128723 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5286970138549805, + "learning_rate": 2.8791666557168902e-05, + "loss": 0.3959, + "step": 9586, + "teacher_loss": 0.3811742961406708 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.47100502252578735, + "learning_rate": 2.8790773298364562e-05, + "loss": 0.3745, + "step": 9587, + "teacher_loss": 0.36382627487182617 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5432640314102173, + "learning_rate": 2.8789879723379137e-05, + "loss": 0.2478, + "step": 9588, + "teacher_loss": 0.215023010969162 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.3296983540058136, + "learning_rate": 2.8788985832233117e-05, + "loss": 0.3359, + "step": 9589, + "teacher_loss": 0.3365897536277771 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.9270901679992676, + "learning_rate": 2.8788091624946995e-05, + "loss": 0.3916, + "step": 9590, + "teacher_loss": 0.33212241530418396 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5110504627227783, + "learning_rate": 2.8787197101541266e-05, + "loss": 0.2536, + "step": 9591, + "teacher_loss": 0.22494640946388245 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.38014084100723267, + "learning_rate": 2.8786302262036447e-05, + "loss": 0.2319, + "step": 9592, + "teacher_loss": 0.21537455916404724 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.6757920980453491, + "learning_rate": 2.878540710645305e-05, + "loss": 0.2849, + "step": 9593, + "teacher_loss": 0.24151358008384705 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.14654678106307983, + "learning_rate": 2.87845116348116e-05, + "loss": 0.2087, + "step": 9594, + "teacher_loss": 0.21561157703399658 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5182464122772217, + "learning_rate": 2.878361584713262e-05, + "loss": 0.3011, + "step": 9595, + "teacher_loss": 0.27693048119544983 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.6885359883308411, + "learning_rate": 2.8782719743436657e-05, + "loss": 0.4473, + "step": 9596, + "teacher_loss": 0.42053189873695374 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.7021652460098267, + "learning_rate": 2.8781823323744255e-05, + "loss": 0.3936, + "step": 9597, + "teacher_loss": 0.3593422770500183 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.29757004976272583, + "learning_rate": 2.8780926588075966e-05, + "loss": 0.3605, + "step": 9598, + "teacher_loss": 0.3674991726875305 + }, + { + "compression_loss": 0.0, + "epoch": 1.73, + "label_loss": 0.5416845679283142, + "learning_rate": 2.8780029536452347e-05, + "loss": 0.2177, + "step": 9599, + "teacher_loss": 0.18169677257537842 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.33476996421813965, + "learning_rate": 2.8779132168893963e-05, + "loss": 0.3161, + "step": 9600, + "teacher_loss": 0.31397363543510437 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.16670675575733185, + "learning_rate": 2.877823448542139e-05, + "loss": 0.1677, + "step": 9601, + "teacher_loss": 0.16783848404884338 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5534331798553467, + "learning_rate": 2.877733648605521e-05, + "loss": 0.2396, + "step": 9602, + "teacher_loss": 0.20473191142082214 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.4948105216026306, + "learning_rate": 2.8776438170816013e-05, + "loss": 0.2355, + "step": 9603, + "teacher_loss": 0.20664048194885254 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5813813209533691, + "learning_rate": 2.877553953972439e-05, + "loss": 0.2857, + "step": 9604, + "teacher_loss": 0.25284358859062195 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 1.0607283115386963, + "learning_rate": 2.8774640592800948e-05, + "loss": 0.7659, + "step": 9605, + "teacher_loss": 0.7331167459487915 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.4963480532169342, + "learning_rate": 2.8773741330066297e-05, + "loss": 0.3471, + "step": 9606, + "teacher_loss": 0.33046776056289673 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.9289522171020508, + "learning_rate": 2.8772841751541054e-05, + "loss": 0.3532, + "step": 9607, + "teacher_loss": 0.2891795039176941 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.3674055337905884, + "learning_rate": 2.8771941857245837e-05, + "loss": 0.2434, + "step": 9608, + "teacher_loss": 0.22962608933448792 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6558855175971985, + "learning_rate": 2.8771041647201292e-05, + "loss": 0.3122, + "step": 9609, + "teacher_loss": 0.2740171253681183 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.29457545280456543, + "learning_rate": 2.877014112142804e-05, + "loss": 0.2371, + "step": 9610, + "teacher_loss": 0.23066972196102142 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.8390344381332397, + "learning_rate": 2.8769240279946742e-05, + "loss": 0.5272, + "step": 9611, + "teacher_loss": 0.4925941824913025 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.42375820875167847, + "learning_rate": 2.876833912277805e-05, + "loss": 0.2915, + "step": 9612, + "teacher_loss": 0.2768496870994568 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.3474855422973633, + "learning_rate": 2.8767437649942617e-05, + "loss": 0.2979, + "step": 9613, + "teacher_loss": 0.29241716861724854 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5069116950035095, + "learning_rate": 2.8766535861461116e-05, + "loss": 0.3142, + "step": 9614, + "teacher_loss": 0.2928082346916199 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.4472291171550751, + "learning_rate": 2.8765633757354223e-05, + "loss": 0.2217, + "step": 9615, + "teacher_loss": 0.1966659277677536 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.32911890745162964, + "learning_rate": 2.876473133764262e-05, + "loss": 0.2176, + "step": 9616, + "teacher_loss": 0.20516744256019592 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.184114009141922, + "learning_rate": 2.8763828602346993e-05, + "loss": 0.1609, + "step": 9617, + "teacher_loss": 0.15837319195270538 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.41836196184158325, + "learning_rate": 2.8762925551488048e-05, + "loss": 0.281, + "step": 9618, + "teacher_loss": 0.26577508449554443 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.8304084539413452, + "learning_rate": 2.8762022185086477e-05, + "loss": 0.3073, + "step": 9619, + "teacher_loss": 0.24922871589660645 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.14868196845054626, + "learning_rate": 2.8761118503163003e-05, + "loss": 0.1784, + "step": 9620, + "teacher_loss": 0.1817294806241989 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6242039799690247, + "learning_rate": 2.8760214505738336e-05, + "loss": 0.2643, + "step": 9621, + "teacher_loss": 0.22429701685905457 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.4024398922920227, + "learning_rate": 2.8759310192833212e-05, + "loss": 0.3543, + "step": 9622, + "teacher_loss": 0.3489968180656433 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5162615776062012, + "learning_rate": 2.875840556446835e-05, + "loss": 0.1828, + "step": 9623, + "teacher_loss": 0.14573538303375244 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.1622384786605835, + "learning_rate": 2.87575006206645e-05, + "loss": 0.2592, + "step": 9624, + "teacher_loss": 0.27002087235450745 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6179137229919434, + "learning_rate": 2.875659536144241e-05, + "loss": 0.2726, + "step": 9625, + "teacher_loss": 0.2341836392879486 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.532171368598938, + "learning_rate": 2.8755689786822833e-05, + "loss": 0.2629, + "step": 9626, + "teacher_loss": 0.23299822211265564 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.334453284740448, + "learning_rate": 2.875478389682653e-05, + "loss": 0.2303, + "step": 9627, + "teacher_loss": 0.21875648200511932 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.1583271324634552, + "learning_rate": 2.8753877691474272e-05, + "loss": 0.2029, + "step": 9628, + "teacher_loss": 0.20785056054592133 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5318273305892944, + "learning_rate": 2.8752971170786834e-05, + "loss": 0.2488, + "step": 9629, + "teacher_loss": 0.2173895537853241 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.4228849411010742, + "learning_rate": 2.8752064334785002e-05, + "loss": 0.2508, + "step": 9630, + "teacher_loss": 0.23163577914237976 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.834248423576355, + "learning_rate": 2.875115718348956e-05, + "loss": 0.3979, + "step": 9631, + "teacher_loss": 0.3493949770927429 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6110309362411499, + "learning_rate": 2.875024971692132e-05, + "loss": 0.2373, + "step": 9632, + "teacher_loss": 0.19582784175872803 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 1.1099125146865845, + "learning_rate": 2.8749341935101075e-05, + "loss": 0.3327, + "step": 9633, + "teacher_loss": 0.24632304906845093 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.23753051459789276, + "learning_rate": 2.8748433838049642e-05, + "loss": 0.1976, + "step": 9634, + "teacher_loss": 0.19317740201950073 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6427618265151978, + "learning_rate": 2.8747525425787844e-05, + "loss": 0.3446, + "step": 9635, + "teacher_loss": 0.31146469712257385 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5272940397262573, + "learning_rate": 2.8746616698336504e-05, + "loss": 0.392, + "step": 9636, + "teacher_loss": 0.3769666254520416 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.2316398024559021, + "learning_rate": 2.8745707655716455e-05, + "loss": 0.1885, + "step": 9637, + "teacher_loss": 0.1837473213672638 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.3742293119430542, + "learning_rate": 2.8744798297948546e-05, + "loss": 0.2874, + "step": 9638, + "teacher_loss": 0.27777624130249023 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.3067373037338257, + "learning_rate": 2.8743888625053613e-05, + "loss": 0.2842, + "step": 9639, + "teacher_loss": 0.2817382514476776 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.43746474385261536, + "learning_rate": 2.874297863705253e-05, + "loss": 0.284, + "step": 9640, + "teacher_loss": 0.2668987214565277 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.4802393317222595, + "learning_rate": 2.874206833396614e-05, + "loss": 0.2012, + "step": 9641, + "teacher_loss": 0.1702032834291458 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 1.6658446788787842, + "learning_rate": 2.8741157715815334e-05, + "loss": 0.4463, + "step": 9642, + "teacher_loss": 0.3108043670654297 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6385030746459961, + "learning_rate": 2.8740246782620973e-05, + "loss": 0.3514, + "step": 9643, + "teacher_loss": 0.31951475143432617 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.3685276508331299, + "learning_rate": 2.8739335534403952e-05, + "loss": 0.2493, + "step": 9644, + "teacher_loss": 0.236043319106102 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.22222258150577545, + "learning_rate": 2.873842397118516e-05, + "loss": 0.2187, + "step": 9645, + "teacher_loss": 0.2183527797460556 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.9464833736419678, + "learning_rate": 2.873751209298549e-05, + "loss": 0.3562, + "step": 9646, + "teacher_loss": 0.2906343638896942 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.5458304286003113, + "learning_rate": 2.873659989982586e-05, + "loss": 0.5585, + "step": 9647, + "teacher_loss": 0.5599138736724854 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.6588068008422852, + "learning_rate": 2.873568739172718e-05, + "loss": 0.3182, + "step": 9648, + "teacher_loss": 0.28034892678260803 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.555245041847229, + "learning_rate": 2.8734774568710372e-05, + "loss": 0.3897, + "step": 9649, + "teacher_loss": 0.3712575137615204 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.36945241689682007, + "learning_rate": 2.8733861430796355e-05, + "loss": 0.2855, + "step": 9650, + "teacher_loss": 0.2761967182159424 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.43980157375335693, + "learning_rate": 2.8732947978006073e-05, + "loss": 0.3105, + "step": 9651, + "teacher_loss": 0.2960885763168335 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.8024044036865234, + "learning_rate": 2.873203421036047e-05, + "loss": 0.3599, + "step": 9652, + "teacher_loss": 0.31072884798049927 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.20619887113571167, + "learning_rate": 2.8731120127880496e-05, + "loss": 0.2096, + "step": 9653, + "teacher_loss": 0.20994935929775238 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.2963258624076843, + "learning_rate": 2.87302057305871e-05, + "loss": 0.2141, + "step": 9654, + "teacher_loss": 0.20498046278953552 + }, + { + "compression_loss": 0.0, + "epoch": 1.74, + "label_loss": 0.3191768527030945, + "learning_rate": 2.8729291018501258e-05, + "loss": 0.2174, + "step": 9655, + "teacher_loss": 0.20611363649368286 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.32053327560424805, + "learning_rate": 2.872837599164393e-05, + "loss": 0.204, + "step": 9656, + "teacher_loss": 0.19110167026519775 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.2586548626422882, + "learning_rate": 2.8727460650036102e-05, + "loss": 0.2418, + "step": 9657, + "teacher_loss": 0.23990559577941895 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.527611255645752, + "learning_rate": 2.872654499369876e-05, + "loss": 0.3257, + "step": 9658, + "teacher_loss": 0.3032572567462921 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.42819738388061523, + "learning_rate": 2.8725629022652894e-05, + "loss": 0.2825, + "step": 9659, + "teacher_loss": 0.26631683111190796 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.3624469041824341, + "learning_rate": 2.8724712736919503e-05, + "loss": 0.2347, + "step": 9660, + "teacher_loss": 0.22052177786827087 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.47687751054763794, + "learning_rate": 2.8723796136519604e-05, + "loss": 0.3031, + "step": 9661, + "teacher_loss": 0.2838353216648102 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4230602979660034, + "learning_rate": 2.87228792214742e-05, + "loss": 0.335, + "step": 9662, + "teacher_loss": 0.3251716196537018 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5529975891113281, + "learning_rate": 2.8721961991804322e-05, + "loss": 0.2895, + "step": 9663, + "teacher_loss": 0.26019471883773804 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.610116720199585, + "learning_rate": 2.8721044447530998e-05, + "loss": 0.2594, + "step": 9664, + "teacher_loss": 0.2204609215259552 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.31353241205215454, + "learning_rate": 2.8720126588675258e-05, + "loss": 0.3551, + "step": 9665, + "teacher_loss": 0.359742134809494 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.24362307786941528, + "learning_rate": 2.8719208415258153e-05, + "loss": 0.211, + "step": 9666, + "teacher_loss": 0.2073325514793396 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.2073344886302948, + "learning_rate": 2.871828992730073e-05, + "loss": 0.3241, + "step": 9667, + "teacher_loss": 0.33704957365989685 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.8534784913063049, + "learning_rate": 2.871737112482405e-05, + "loss": 0.3936, + "step": 9668, + "teacher_loss": 0.34253209829330444 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.19210059940814972, + "learning_rate": 2.8716452007849177e-05, + "loss": 0.2519, + "step": 9669, + "teacher_loss": 0.2585349380970001 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.3710358738899231, + "learning_rate": 2.8715532576397187e-05, + "loss": 0.4822, + "step": 9670, + "teacher_loss": 0.4945949912071228 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.6151652336120605, + "learning_rate": 2.8714612830489148e-05, + "loss": 0.348, + "step": 9671, + "teacher_loss": 0.3183022737503052 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.34702199697494507, + "learning_rate": 2.8713692770146158e-05, + "loss": 0.2069, + "step": 9672, + "teacher_loss": 0.1913062483072281 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5169847011566162, + "learning_rate": 2.8712772395389312e-05, + "loss": 0.2455, + "step": 9673, + "teacher_loss": 0.21538686752319336 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.674384593963623, + "learning_rate": 2.8711851706239705e-05, + "loss": 0.3621, + "step": 9674, + "teacher_loss": 0.32739055156707764 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.32784146070480347, + "learning_rate": 2.871093070271845e-05, + "loss": 0.2344, + "step": 9675, + "teacher_loss": 0.22406092286109924 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4151034951210022, + "learning_rate": 2.8710009384846664e-05, + "loss": 0.2134, + "step": 9676, + "teacher_loss": 0.19093704223632812 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.28529709577560425, + "learning_rate": 2.8709087752645464e-05, + "loss": 0.1956, + "step": 9677, + "teacher_loss": 0.1856432408094406 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.27403947710990906, + "learning_rate": 2.8708165806135986e-05, + "loss": 0.1814, + "step": 9678, + "teacher_loss": 0.1711527705192566 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.2687414884567261, + "learning_rate": 2.8707243545339364e-05, + "loss": 0.2112, + "step": 9679, + "teacher_loss": 0.20479148626327515 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5349726676940918, + "learning_rate": 2.870632097027674e-05, + "loss": 0.2526, + "step": 9680, + "teacher_loss": 0.22120168805122375 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5195779204368591, + "learning_rate": 2.8705398080969274e-05, + "loss": 0.267, + "step": 9681, + "teacher_loss": 0.23892182111740112 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.313081830739975, + "learning_rate": 2.870447487743812e-05, + "loss": 0.258, + "step": 9682, + "teacher_loss": 0.2519094944000244 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 1.0189120769500732, + "learning_rate": 2.8703551359704447e-05, + "loss": 0.4375, + "step": 9683, + "teacher_loss": 0.3729253113269806 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5007901787757874, + "learning_rate": 2.8702627527789427e-05, + "loss": 0.2954, + "step": 9684, + "teacher_loss": 0.27253109216690063 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.2311154305934906, + "learning_rate": 2.8701703381714237e-05, + "loss": 0.1716, + "step": 9685, + "teacher_loss": 0.16503441333770752 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.6135351657867432, + "learning_rate": 2.870077892150007e-05, + "loss": 0.3598, + "step": 9686, + "teacher_loss": 0.33164626359939575 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.33166125416755676, + "learning_rate": 2.869985414716812e-05, + "loss": 0.2115, + "step": 9687, + "teacher_loss": 0.1980951875448227 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.3468107581138611, + "learning_rate": 2.8698929058739587e-05, + "loss": 0.2286, + "step": 9688, + "teacher_loss": 0.21543839573860168 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5006410479545593, + "learning_rate": 2.8698003656235686e-05, + "loss": 0.5066, + "step": 9689, + "teacher_loss": 0.5072280168533325 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.3160492777824402, + "learning_rate": 2.8697077939677627e-05, + "loss": 0.1964, + "step": 9690, + "teacher_loss": 0.18307873606681824 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.41973501443862915, + "learning_rate": 2.8696151909086633e-05, + "loss": 0.2339, + "step": 9691, + "teacher_loss": 0.21327200531959534 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4365221858024597, + "learning_rate": 2.869522556448395e-05, + "loss": 0.2685, + "step": 9692, + "teacher_loss": 0.2498350739479065 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.6558871269226074, + "learning_rate": 2.8694298905890795e-05, + "loss": 0.3587, + "step": 9693, + "teacher_loss": 0.32568246126174927 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5029205679893494, + "learning_rate": 2.8693371933328426e-05, + "loss": 0.3843, + "step": 9694, + "teacher_loss": 0.3711155652999878 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4935513734817505, + "learning_rate": 2.8692444646818096e-05, + "loss": 0.2867, + "step": 9695, + "teacher_loss": 0.26376235485076904 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4425468444824219, + "learning_rate": 2.8691517046381062e-05, + "loss": 0.3191, + "step": 9696, + "teacher_loss": 0.30540698766708374 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5071808099746704, + "learning_rate": 2.869058913203859e-05, + "loss": 0.2612, + "step": 9697, + "teacher_loss": 0.23386384546756744 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5825119614601135, + "learning_rate": 2.8689660903811956e-05, + "loss": 0.3148, + "step": 9698, + "teacher_loss": 0.2850485146045685 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.39906707406044006, + "learning_rate": 2.8688732361722438e-05, + "loss": 0.3074, + "step": 9699, + "teacher_loss": 0.2971632480621338 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 1.064254879951477, + "learning_rate": 2.868780350579133e-05, + "loss": 0.4063, + "step": 9700, + "teacher_loss": 0.3331586718559265 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4207301139831543, + "learning_rate": 2.868687433603993e-05, + "loss": 0.3103, + "step": 9701, + "teacher_loss": 0.2980774939060211 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5560793876647949, + "learning_rate": 2.8685944852489533e-05, + "loss": 0.2688, + "step": 9702, + "teacher_loss": 0.2368604689836502 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5776731371879578, + "learning_rate": 2.8685015055161454e-05, + "loss": 0.4198, + "step": 9703, + "teacher_loss": 0.40221595764160156 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4112074375152588, + "learning_rate": 2.8684084944077012e-05, + "loss": 0.2462, + "step": 9704, + "teacher_loss": 0.22789642214775085 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.2412782907485962, + "learning_rate": 2.8683154519257523e-05, + "loss": 0.2392, + "step": 9705, + "teacher_loss": 0.23902060091495514 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.43352025747299194, + "learning_rate": 2.868222378072433e-05, + "loss": 0.3681, + "step": 9706, + "teacher_loss": 0.3607964515686035 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.4817594587802887, + "learning_rate": 2.8681292728498768e-05, + "loss": 0.3523, + "step": 9707, + "teacher_loss": 0.3378788232803345 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.5018646121025085, + "learning_rate": 2.8680361362602178e-05, + "loss": 0.2811, + "step": 9708, + "teacher_loss": 0.2565270960330963 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.3221518397331238, + "learning_rate": 2.8679429683055918e-05, + "loss": 0.2118, + "step": 9709, + "teacher_loss": 0.1995304375886917 + }, + { + "compression_loss": 0.0, + "epoch": 1.75, + "label_loss": 0.25086843967437744, + "learning_rate": 2.8678497689881354e-05, + "loss": 0.1874, + "step": 9710, + "teacher_loss": 0.1803758293390274 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.3539390563964844, + "learning_rate": 2.8677565383099845e-05, + "loss": 0.4457, + "step": 9711, + "teacher_loss": 0.455923855304718 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.9557262659072876, + "learning_rate": 2.8676632762732762e-05, + "loss": 0.5112, + "step": 9712, + "teacher_loss": 0.46177637577056885 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.5417178869247437, + "learning_rate": 2.8675699828801506e-05, + "loss": 0.2265, + "step": 9713, + "teacher_loss": 0.19149817526340485 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6044718027114868, + "learning_rate": 2.867476658132745e-05, + "loss": 0.2619, + "step": 9714, + "teacher_loss": 0.22383756935596466 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.2758791744709015, + "learning_rate": 2.8673833020331993e-05, + "loss": 0.2405, + "step": 9715, + "teacher_loss": 0.23660224676132202 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.9286810159683228, + "learning_rate": 2.8672899145836548e-05, + "loss": 0.4207, + "step": 9716, + "teacher_loss": 0.3642313480377197 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.669777512550354, + "learning_rate": 2.8671964957862517e-05, + "loss": 0.5166, + "step": 9717, + "teacher_loss": 0.49958646297454834 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.2174762636423111, + "learning_rate": 2.8671030456431313e-05, + "loss": 0.182, + "step": 9718, + "teacher_loss": 0.17805570363998413 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.39027640223503113, + "learning_rate": 2.8670095641564375e-05, + "loss": 0.2282, + "step": 9719, + "teacher_loss": 0.21022240817546844 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.5518113970756531, + "learning_rate": 2.8669160513283125e-05, + "loss": 0.2679, + "step": 9720, + "teacher_loss": 0.23632583022117615 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6897218227386475, + "learning_rate": 2.8668225071609012e-05, + "loss": 0.3779, + "step": 9721, + "teacher_loss": 0.3432038128376007 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.34343016147613525, + "learning_rate": 2.8667289316563476e-05, + "loss": 0.2399, + "step": 9722, + "teacher_loss": 0.22843725979328156 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.4929710626602173, + "learning_rate": 2.8666353248167967e-05, + "loss": 0.2421, + "step": 9723, + "teacher_loss": 0.21427667140960693 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6443290710449219, + "learning_rate": 2.866541686644396e-05, + "loss": 0.2804, + "step": 9724, + "teacher_loss": 0.2399246096611023 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.4794856309890747, + "learning_rate": 2.866448017141291e-05, + "loss": 0.3024, + "step": 9725, + "teacher_loss": 0.28271445631980896 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.5353676080703735, + "learning_rate": 2.8663543163096298e-05, + "loss": 0.2489, + "step": 9726, + "teacher_loss": 0.21712306141853333 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.8236091732978821, + "learning_rate": 2.866260584151561e-05, + "loss": 0.2857, + "step": 9727, + "teacher_loss": 0.22595667839050293 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.5364168286323547, + "learning_rate": 2.866166820669233e-05, + "loss": 0.2299, + "step": 9728, + "teacher_loss": 0.1958591639995575 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.7631489038467407, + "learning_rate": 2.8660730258647962e-05, + "loss": 0.2712, + "step": 9729, + "teacher_loss": 0.21655318140983582 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.4687633216381073, + "learning_rate": 2.8659791997404e-05, + "loss": 0.2241, + "step": 9730, + "teacher_loss": 0.19693905115127563 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.7576920986175537, + "learning_rate": 2.8658853422981964e-05, + "loss": 0.4193, + "step": 9731, + "teacher_loss": 0.381686806678772 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.23076625168323517, + "learning_rate": 2.865791453540337e-05, + "loss": 0.1948, + "step": 9732, + "teacher_loss": 0.19076424837112427 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.571945309638977, + "learning_rate": 2.8656975334689746e-05, + "loss": 0.2368, + "step": 9733, + "teacher_loss": 0.19953002035617828 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.247168630361557, + "learning_rate": 2.865603582086262e-05, + "loss": 0.1812, + "step": 9734, + "teacher_loss": 0.17381900548934937 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.7607147693634033, + "learning_rate": 2.865509599394354e-05, + "loss": 0.4261, + "step": 9735, + "teacher_loss": 0.38888630270957947 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.3236229717731476, + "learning_rate": 2.8654155853954044e-05, + "loss": 0.2537, + "step": 9736, + "teacher_loss": 0.24590489268302917 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.17992353439331055, + "learning_rate": 2.8653215400915696e-05, + "loss": 0.2032, + "step": 9737, + "teacher_loss": 0.20584172010421753 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.30910223722457886, + "learning_rate": 2.865227463485005e-05, + "loss": 0.3685, + "step": 9738, + "teacher_loss": 0.37504446506500244 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.16428303718566895, + "learning_rate": 2.865133355577868e-05, + "loss": 0.2474, + "step": 9739, + "teacher_loss": 0.2566138505935669 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.7258551120758057, + "learning_rate": 2.8650392163723165e-05, + "loss": 0.3786, + "step": 9740, + "teacher_loss": 0.340040922164917 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.39395958185195923, + "learning_rate": 2.8649450458705076e-05, + "loss": 0.1857, + "step": 9741, + "teacher_loss": 0.1625867486000061 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.2700785994529724, + "learning_rate": 2.8648508440746015e-05, + "loss": 0.2368, + "step": 9742, + "teacher_loss": 0.2331409752368927 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.34350600838661194, + "learning_rate": 2.8647566109867573e-05, + "loss": 0.2873, + "step": 9743, + "teacher_loss": 0.28108030557632446 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.33573177456855774, + "learning_rate": 2.8646623466091362e-05, + "loss": 0.2209, + "step": 9744, + "teacher_loss": 0.20811808109283447 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.3943679928779602, + "learning_rate": 2.864568050943899e-05, + "loss": 0.3507, + "step": 9745, + "teacher_loss": 0.34584128856658936 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.2950679361820221, + "learning_rate": 2.8644737239932072e-05, + "loss": 0.2685, + "step": 9746, + "teacher_loss": 0.2655636966228485 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.5501250624656677, + "learning_rate": 2.8643793657592236e-05, + "loss": 0.2817, + "step": 9747, + "teacher_loss": 0.2518511414527893 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.3824694752693176, + "learning_rate": 2.8642849762441122e-05, + "loss": 0.2985, + "step": 9748, + "teacher_loss": 0.2891874313354492 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.7683556079864502, + "learning_rate": 2.8641905554500366e-05, + "loss": 0.3339, + "step": 9749, + "teacher_loss": 0.2856735587120056 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.3394649624824524, + "learning_rate": 2.8640961033791616e-05, + "loss": 0.287, + "step": 9750, + "teacher_loss": 0.28117120265960693 + }, + { + "epoch": 1.76, + "eval_exact_match": 79.51750236518448, + "eval_f1": 86.93126886079065, + "step": 9750 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.4933737814426422, + "learning_rate": 2.8640016200336527e-05, + "loss": 0.2431, + "step": 9751, + "teacher_loss": 0.21523982286453247 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.29733914136886597, + "learning_rate": 2.8639071054156758e-05, + "loss": 0.2626, + "step": 9752, + "teacher_loss": 0.25875383615493774 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.393619179725647, + "learning_rate": 2.8638125595273984e-05, + "loss": 0.2779, + "step": 9753, + "teacher_loss": 0.2650052607059479 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6418403387069702, + "learning_rate": 2.8637179823709885e-05, + "loss": 0.3048, + "step": 9754, + "teacher_loss": 0.2673349976539612 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.4987420439720154, + "learning_rate": 2.8636233739486132e-05, + "loss": 0.3131, + "step": 9755, + "teacher_loss": 0.29245319962501526 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.3940998911857605, + "learning_rate": 2.8635287342624425e-05, + "loss": 0.2463, + "step": 9756, + "teacher_loss": 0.22992157936096191 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6517037153244019, + "learning_rate": 2.863434063314646e-05, + "loss": 0.2252, + "step": 9757, + "teacher_loss": 0.17785786092281342 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6963098049163818, + "learning_rate": 2.8633393611073943e-05, + "loss": 0.3124, + "step": 9758, + "teacher_loss": 0.2696886956691742 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.17263562977313995, + "learning_rate": 2.8632446276428582e-05, + "loss": 0.1402, + "step": 9759, + "teacher_loss": 0.13657408952713013 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.23942387104034424, + "learning_rate": 2.8631498629232103e-05, + "loss": 0.176, + "step": 9760, + "teacher_loss": 0.168988436460495 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.43425804376602173, + "learning_rate": 2.8630550669506232e-05, + "loss": 0.2447, + "step": 9761, + "teacher_loss": 0.22359436750411987 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.6547526717185974, + "learning_rate": 2.8629602397272696e-05, + "loss": 0.3065, + "step": 9762, + "teacher_loss": 0.2677672505378723 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.8350136280059814, + "learning_rate": 2.8628653812553242e-05, + "loss": 0.352, + "step": 9763, + "teacher_loss": 0.2982976734638214 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.5028699636459351, + "learning_rate": 2.8627704915369622e-05, + "loss": 0.3641, + "step": 9764, + "teacher_loss": 0.34864526987075806 + }, + { + "compression_loss": 0.0, + "epoch": 1.76, + "label_loss": 0.9776653051376343, + "learning_rate": 2.862675570574358e-05, + "loss": 0.3766, + "step": 9765, + "teacher_loss": 0.3098118305206299 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.3517906665802002, + "learning_rate": 2.8625806183696885e-05, + "loss": 0.2689, + "step": 9766, + "teacher_loss": 0.2596439719200134 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.35569554567337036, + "learning_rate": 2.862485634925131e-05, + "loss": 0.2142, + "step": 9767, + "teacher_loss": 0.1984252631664276 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 1.1156789064407349, + "learning_rate": 2.8623906202428628e-05, + "loss": 0.4184, + "step": 9768, + "teacher_loss": 0.3408896327018738 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.31348246335983276, + "learning_rate": 2.8622955743250622e-05, + "loss": 0.3634, + "step": 9769, + "teacher_loss": 0.3689323663711548 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.2325993776321411, + "learning_rate": 2.8622004971739086e-05, + "loss": 0.2801, + "step": 9770, + "teacher_loss": 0.285374253988266 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.6928762793540955, + "learning_rate": 2.8621053887915813e-05, + "loss": 0.3338, + "step": 9771, + "teacher_loss": 0.29392164945602417 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.44200950860977173, + "learning_rate": 2.862010249180262e-05, + "loss": 0.2281, + "step": 9772, + "teacher_loss": 0.2042994201183319 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.7773900032043457, + "learning_rate": 2.8619150783421303e-05, + "loss": 0.5007, + "step": 9773, + "teacher_loss": 0.469971626996994 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.70094233751297, + "learning_rate": 2.8618198762793696e-05, + "loss": 0.3197, + "step": 9774, + "teacher_loss": 0.2773780822753906 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.42249706387519836, + "learning_rate": 2.861724642994162e-05, + "loss": 0.3537, + "step": 9775, + "teacher_loss": 0.3461114168167114 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.4572070837020874, + "learning_rate": 2.861629378488691e-05, + "loss": 0.2855, + "step": 9776, + "teacher_loss": 0.26643991470336914 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.5427082180976868, + "learning_rate": 2.8615340827651407e-05, + "loss": 0.36, + "step": 9777, + "teacher_loss": 0.3397466540336609 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.5354291200637817, + "learning_rate": 2.861438755825696e-05, + "loss": 0.3228, + "step": 9778, + "teacher_loss": 0.29919272661209106 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.9165619611740112, + "learning_rate": 2.8613433976725424e-05, + "loss": 0.2909, + "step": 9779, + "teacher_loss": 0.2213437557220459 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.3289463520050049, + "learning_rate": 2.8612480083078658e-05, + "loss": 0.2108, + "step": 9780, + "teacher_loss": 0.19766588509082794 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.4514867067337036, + "learning_rate": 2.8611525877338543e-05, + "loss": 0.248, + "step": 9781, + "teacher_loss": 0.2253987193107605 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.6315058469772339, + "learning_rate": 2.8610571359526944e-05, + "loss": 0.2889, + "step": 9782, + "teacher_loss": 0.25080764293670654 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.5729396343231201, + "learning_rate": 2.8609616529665753e-05, + "loss": 0.3594, + "step": 9783, + "teacher_loss": 0.3356652855873108 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.5215010643005371, + "learning_rate": 2.860866138777686e-05, + "loss": 0.2557, + "step": 9784, + "teacher_loss": 0.2261490821838379 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.8496921062469482, + "learning_rate": 2.8607705933882157e-05, + "loss": 0.3558, + "step": 9785, + "teacher_loss": 0.30092036724090576 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.3914310336112976, + "learning_rate": 2.860675016800356e-05, + "loss": 0.181, + "step": 9786, + "teacher_loss": 0.15760132670402527 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.7524313926696777, + "learning_rate": 2.8605794090162978e-05, + "loss": 0.3927, + "step": 9787, + "teacher_loss": 0.35273706912994385 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.8007498979568481, + "learning_rate": 2.8604837700382324e-05, + "loss": 0.271, + "step": 9788, + "teacher_loss": 0.21209371089935303 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.2934896945953369, + "learning_rate": 2.8603880998683535e-05, + "loss": 0.2285, + "step": 9789, + "teacher_loss": 0.22125005722045898 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.3135831356048584, + "learning_rate": 2.8602923985088543e-05, + "loss": 0.171, + "step": 9790, + "teacher_loss": 0.15514321625232697 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.7407754063606262, + "learning_rate": 2.8601966659619283e-05, + "loss": 0.3897, + "step": 9791, + "teacher_loss": 0.35070645809173584 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.4148141145706177, + "learning_rate": 2.860100902229771e-05, + "loss": 0.2872, + "step": 9792, + "teacher_loss": 0.2729969024658203 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.6527068614959717, + "learning_rate": 2.8600051073145778e-05, + "loss": 0.3029, + "step": 9793, + "teacher_loss": 0.2640741467475891 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.403251588344574, + "learning_rate": 2.8599092812185453e-05, + "loss": 0.2835, + "step": 9794, + "teacher_loss": 0.27024146914482117 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.6129017472267151, + "learning_rate": 2.8598134239438697e-05, + "loss": 0.3429, + "step": 9795, + "teacher_loss": 0.31294164061546326 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.6439265012741089, + "learning_rate": 2.8597175354927492e-05, + "loss": 0.4273, + "step": 9796, + "teacher_loss": 0.4032716155052185 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.5898388624191284, + "learning_rate": 2.8596216158673828e-05, + "loss": 0.3301, + "step": 9797, + "teacher_loss": 0.3012082874774933 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.281993567943573, + "learning_rate": 2.859525665069969e-05, + "loss": 0.2386, + "step": 9798, + "teacher_loss": 0.2337360978126526 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.5858614444732666, + "learning_rate": 2.8594296831027075e-05, + "loss": 0.3866, + "step": 9799, + "teacher_loss": 0.36444640159606934 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.7638803720474243, + "learning_rate": 2.859333669967799e-05, + "loss": 0.2725, + "step": 9800, + "teacher_loss": 0.21795490384101868 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.128788560628891, + "learning_rate": 2.8592376256674455e-05, + "loss": 0.1834, + "step": 9801, + "teacher_loss": 0.18948069214820862 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.12381817400455475, + "learning_rate": 2.8591415502038477e-05, + "loss": 0.2161, + "step": 9802, + "teacher_loss": 0.22639340162277222 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.525272786617279, + "learning_rate": 2.8590454435792096e-05, + "loss": 0.2757, + "step": 9803, + "teacher_loss": 0.2480016052722931 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.31802237033843994, + "learning_rate": 2.8589493057957337e-05, + "loss": 0.3262, + "step": 9804, + "teacher_loss": 0.3271319270133972 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 1.077929973602295, + "learning_rate": 2.8588531368556254e-05, + "loss": 0.3946, + "step": 9805, + "teacher_loss": 0.3186517059803009 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.36122673749923706, + "learning_rate": 2.8587569367610882e-05, + "loss": 0.2614, + "step": 9806, + "teacher_loss": 0.2502981424331665 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.2923242449760437, + "learning_rate": 2.858660705514328e-05, + "loss": 0.2469, + "step": 9807, + "teacher_loss": 0.2418815940618515 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.4481073021888733, + "learning_rate": 2.858564443117551e-05, + "loss": 0.3191, + "step": 9808, + "teacher_loss": 0.3047882914543152 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.4928377866744995, + "learning_rate": 2.8584681495729657e-05, + "loss": 0.2201, + "step": 9809, + "teacher_loss": 0.18981999158859253 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.25782811641693115, + "learning_rate": 2.8583718248827778e-05, + "loss": 0.3193, + "step": 9810, + "teacher_loss": 0.32612496614456177 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.4052194058895111, + "learning_rate": 2.8582754690491966e-05, + "loss": 0.2999, + "step": 9811, + "teacher_loss": 0.28816381096839905 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.269300252199173, + "learning_rate": 2.8581790820744315e-05, + "loss": 0.2201, + "step": 9812, + "teacher_loss": 0.21467384696006775 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.2831853926181793, + "learning_rate": 2.8580826639606915e-05, + "loss": 0.1787, + "step": 9813, + "teacher_loss": 0.16705730557441711 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.28005528450012207, + "learning_rate": 2.8579862147101884e-05, + "loss": 0.2516, + "step": 9814, + "teacher_loss": 0.24841713905334473 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.46240460872650146, + "learning_rate": 2.8578897343251327e-05, + "loss": 0.3808, + "step": 9815, + "teacher_loss": 0.37174704670906067 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.9807002544403076, + "learning_rate": 2.8577932228077364e-05, + "loss": 0.3343, + "step": 9816, + "teacher_loss": 0.2624368965625763 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.47120124101638794, + "learning_rate": 2.8576966801602123e-05, + "loss": 0.3385, + "step": 9817, + "teacher_loss": 0.3237246870994568 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.21141573786735535, + "learning_rate": 2.8576001063847743e-05, + "loss": 0.2297, + "step": 9818, + "teacher_loss": 0.23177461326122284 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.206715390086174, + "learning_rate": 2.8575035014836354e-05, + "loss": 0.2964, + "step": 9819, + "teacher_loss": 0.30636298656463623 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.3624690771102905, + "learning_rate": 2.8574068654590118e-05, + "loss": 0.4137, + "step": 9820, + "teacher_loss": 0.41935306787490845 + }, + { + "compression_loss": 0.0, + "epoch": 1.77, + "label_loss": 0.6026482582092285, + "learning_rate": 2.8573101983131184e-05, + "loss": 0.4615, + "step": 9821, + "teacher_loss": 0.4458516538143158 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.4024810194969177, + "learning_rate": 2.8572135000481717e-05, + "loss": 0.2758, + "step": 9822, + "teacher_loss": 0.26166924834251404 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.20206865668296814, + "learning_rate": 2.857116770666389e-05, + "loss": 0.2337, + "step": 9823, + "teacher_loss": 0.237208291888237 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.7365269660949707, + "learning_rate": 2.8570200101699868e-05, + "loss": 0.2601, + "step": 9824, + "teacher_loss": 0.20721282064914703 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3817863464355469, + "learning_rate": 2.8569232185611843e-05, + "loss": 0.2581, + "step": 9825, + "teacher_loss": 0.24433547258377075 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.4644962251186371, + "learning_rate": 2.8568263958422013e-05, + "loss": 0.2864, + "step": 9826, + "teacher_loss": 0.26657551527023315 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.36981451511383057, + "learning_rate": 2.8567295420152567e-05, + "loss": 0.28, + "step": 9827, + "teacher_loss": 0.27003952860832214 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.607772946357727, + "learning_rate": 2.856632657082571e-05, + "loss": 0.302, + "step": 9828, + "teacher_loss": 0.2680475115776062 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.7324094772338867, + "learning_rate": 2.8565357410463664e-05, + "loss": 0.3301, + "step": 9829, + "teacher_loss": 0.2854401469230652 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.6441067457199097, + "learning_rate": 2.856438793908864e-05, + "loss": 0.2728, + "step": 9830, + "teacher_loss": 0.2315816730260849 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.6084052920341492, + "learning_rate": 2.8563418156722875e-05, + "loss": 0.3063, + "step": 9831, + "teacher_loss": 0.2727759778499603 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.718802809715271, + "learning_rate": 2.8562448063388592e-05, + "loss": 0.3303, + "step": 9832, + "teacher_loss": 0.287092924118042 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.27423417568206787, + "learning_rate": 2.8561477659108034e-05, + "loss": 0.2469, + "step": 9833, + "teacher_loss": 0.24389639496803284 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.5913593769073486, + "learning_rate": 2.8560506943903455e-05, + "loss": 0.2845, + "step": 9834, + "teacher_loss": 0.25044262409210205 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.8238702416419983, + "learning_rate": 2.8559535917797114e-05, + "loss": 0.2854, + "step": 9835, + "teacher_loss": 0.22556337714195251 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.40958574414253235, + "learning_rate": 2.855856458081126e-05, + "loss": 0.3014, + "step": 9836, + "teacher_loss": 0.28938421607017517 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.329242467880249, + "learning_rate": 2.8557592932968177e-05, + "loss": 0.3378, + "step": 9837, + "teacher_loss": 0.33874160051345825 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.36360520124435425, + "learning_rate": 2.8556620974290132e-05, + "loss": 0.2673, + "step": 9838, + "teacher_loss": 0.25660085678100586 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.32166025042533875, + "learning_rate": 2.8555648704799418e-05, + "loss": 0.2427, + "step": 9839, + "teacher_loss": 0.23396605253219604 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3262128233909607, + "learning_rate": 2.8554676124518313e-05, + "loss": 0.1946, + "step": 9840, + "teacher_loss": 0.1800258457660675 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3956587314605713, + "learning_rate": 2.855370323346913e-05, + "loss": 0.2516, + "step": 9841, + "teacher_loss": 0.23559823632240295 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.2078457474708557, + "learning_rate": 2.8552730031674164e-05, + "loss": 0.2478, + "step": 9842, + "teacher_loss": 0.2521919012069702 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.29933232069015503, + "learning_rate": 2.8551756519155732e-05, + "loss": 0.2719, + "step": 9843, + "teacher_loss": 0.26889461278915405 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.18209177255630493, + "learning_rate": 2.8550782695936156e-05, + "loss": 0.1938, + "step": 9844, + "teacher_loss": 0.19512730836868286 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.815851628780365, + "learning_rate": 2.854980856203776e-05, + "loss": 0.2908, + "step": 9845, + "teacher_loss": 0.23244981467723846 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.5966867804527283, + "learning_rate": 2.8548834117482877e-05, + "loss": 0.334, + "step": 9846, + "teacher_loss": 0.30483123660087585 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.4707828462123871, + "learning_rate": 2.854785936229385e-05, + "loss": 0.2204, + "step": 9847, + "teacher_loss": 0.19260910153388977 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.32042425870895386, + "learning_rate": 2.8546884296493027e-05, + "loss": 0.4107, + "step": 9848, + "teacher_loss": 0.4207236170768738 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.21508243680000305, + "learning_rate": 2.854590892010276e-05, + "loss": 0.2845, + "step": 9849, + "teacher_loss": 0.292217880487442 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.567185640335083, + "learning_rate": 2.8544933233145418e-05, + "loss": 0.3431, + "step": 9850, + "teacher_loss": 0.31823480129241943 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.5142399668693542, + "learning_rate": 2.8543957235643362e-05, + "loss": 0.4037, + "step": 9851, + "teacher_loss": 0.3913862705230713 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.19140490889549255, + "learning_rate": 2.8542980927618974e-05, + "loss": 0.203, + "step": 9852, + "teacher_loss": 0.20426297187805176 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.29847294092178345, + "learning_rate": 2.854200430909464e-05, + "loss": 0.2184, + "step": 9853, + "teacher_loss": 0.20952346920967102 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.7001780867576599, + "learning_rate": 2.8541027380092747e-05, + "loss": 0.4384, + "step": 9854, + "teacher_loss": 0.40936318039894104 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.7761991620063782, + "learning_rate": 2.8540050140635694e-05, + "loss": 0.361, + "step": 9855, + "teacher_loss": 0.314916729927063 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.7685930728912354, + "learning_rate": 2.853907259074589e-05, + "loss": 0.373, + "step": 9856, + "teacher_loss": 0.3290581703186035 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3320949375629425, + "learning_rate": 2.853809473044574e-05, + "loss": 0.247, + "step": 9857, + "teacher_loss": 0.2374969869852066 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.4057954251766205, + "learning_rate": 2.8537116559757666e-05, + "loss": 0.2801, + "step": 9858, + "teacher_loss": 0.2661496102809906 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3791216015815735, + "learning_rate": 2.85361380787041e-05, + "loss": 0.213, + "step": 9859, + "teacher_loss": 0.19449107348918915 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.31069695949554443, + "learning_rate": 2.8535159287307466e-05, + "loss": 0.2257, + "step": 9860, + "teacher_loss": 0.21625259518623352 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.8401548862457275, + "learning_rate": 2.853418018559022e-05, + "loss": 0.4218, + "step": 9861, + "teacher_loss": 0.37532132863998413 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3823752701282501, + "learning_rate": 2.8533200773574787e-05, + "loss": 0.2749, + "step": 9862, + "teacher_loss": 0.2629657983779907 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 1.0753488540649414, + "learning_rate": 2.853222105128364e-05, + "loss": 0.3257, + "step": 9863, + "teacher_loss": 0.2424429953098297 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.5001740455627441, + "learning_rate": 2.853124101873924e-05, + "loss": 0.3137, + "step": 9864, + "teacher_loss": 0.29298165440559387 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 1.1806230545043945, + "learning_rate": 2.8530260675964046e-05, + "loss": 0.5216, + "step": 9865, + "teacher_loss": 0.4483719766139984 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.12440374493598938, + "learning_rate": 2.8529280022980546e-05, + "loss": 0.2351, + "step": 9866, + "teacher_loss": 0.24739037454128265 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.21661391854286194, + "learning_rate": 2.8528299059811215e-05, + "loss": 0.2204, + "step": 9867, + "teacher_loss": 0.22086814045906067 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.5308147668838501, + "learning_rate": 2.852731778647855e-05, + "loss": 0.2288, + "step": 9868, + "teacher_loss": 0.1952137053012848 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.6771795153617859, + "learning_rate": 2.852633620300504e-05, + "loss": 0.3439, + "step": 9869, + "teacher_loss": 0.30682307481765747 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3790631890296936, + "learning_rate": 2.8525354309413194e-05, + "loss": 0.203, + "step": 9870, + "teacher_loss": 0.18340197205543518 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.6363522410392761, + "learning_rate": 2.852437210572553e-05, + "loss": 0.3238, + "step": 9871, + "teacher_loss": 0.2890468239784241 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.26097410917282104, + "learning_rate": 2.852338959196456e-05, + "loss": 0.2282, + "step": 9872, + "teacher_loss": 0.2245054543018341 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.5878296494483948, + "learning_rate": 2.852240676815281e-05, + "loss": 0.234, + "step": 9873, + "teacher_loss": 0.19466212391853333 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.27538934350013733, + "learning_rate": 2.852142363431282e-05, + "loss": 0.2383, + "step": 9874, + "teacher_loss": 0.2342124879360199 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.45194587111473083, + "learning_rate": 2.852044019046712e-05, + "loss": 0.2452, + "step": 9875, + "teacher_loss": 0.22225746512413025 + }, + { + "compression_loss": 0.0, + "epoch": 1.78, + "label_loss": 0.3667333126068115, + "learning_rate": 2.8519456436638264e-05, + "loss": 0.2073, + "step": 9876, + "teacher_loss": 0.1896015703678131 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.6565039157867432, + "learning_rate": 2.8518472372848807e-05, + "loss": 0.3031, + "step": 9877, + "teacher_loss": 0.26383769512176514 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5831775665283203, + "learning_rate": 2.851748799912131e-05, + "loss": 0.3206, + "step": 9878, + "teacher_loss": 0.29143059253692627 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.6539947986602783, + "learning_rate": 2.8516503315478335e-05, + "loss": 0.2663, + "step": 9879, + "teacher_loss": 0.2232193648815155 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5833121538162231, + "learning_rate": 2.8515518321942472e-05, + "loss": 0.2865, + "step": 9880, + "teacher_loss": 0.25355756282806396 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.17323970794677734, + "learning_rate": 2.8514533018536286e-05, + "loss": 0.2257, + "step": 9881, + "teacher_loss": 0.23154215514659882 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 1.0290762186050415, + "learning_rate": 2.851354740528238e-05, + "loss": 0.3406, + "step": 9882, + "teacher_loss": 0.2640581727027893 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.44317227602005005, + "learning_rate": 2.8512561482203353e-05, + "loss": 0.2494, + "step": 9883, + "teacher_loss": 0.2278556078672409 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 1.023535132408142, + "learning_rate": 2.85115752493218e-05, + "loss": 0.4149, + "step": 9884, + "teacher_loss": 0.3473111391067505 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.44727179408073425, + "learning_rate": 2.8510588706660338e-05, + "loss": 0.289, + "step": 9885, + "teacher_loss": 0.27137070894241333 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.9495588541030884, + "learning_rate": 2.8509601854241582e-05, + "loss": 0.3094, + "step": 9886, + "teacher_loss": 0.23825867474079132 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2791885733604431, + "learning_rate": 2.850861469208816e-05, + "loss": 0.3149, + "step": 9887, + "teacher_loss": 0.3189193904399872 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.4645458459854126, + "learning_rate": 2.8507627220222703e-05, + "loss": 0.2793, + "step": 9888, + "teacher_loss": 0.2587423026561737 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.618566632270813, + "learning_rate": 2.8506639438667853e-05, + "loss": 0.5393, + "step": 9889, + "teacher_loss": 0.5304646492004395 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.4126269817352295, + "learning_rate": 2.850565134744625e-05, + "loss": 0.2857, + "step": 9890, + "teacher_loss": 0.2716103494167328 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.15565399825572968, + "learning_rate": 2.8504662946580563e-05, + "loss": 0.2011, + "step": 9891, + "teacher_loss": 0.20614346861839294 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2347583770751953, + "learning_rate": 2.8503674236093438e-05, + "loss": 0.2252, + "step": 9892, + "teacher_loss": 0.22415444254875183 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.6579325795173645, + "learning_rate": 2.850268521600755e-05, + "loss": 0.378, + "step": 9893, + "teacher_loss": 0.34686344861984253 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.582531750202179, + "learning_rate": 2.8501695886345574e-05, + "loss": 0.3054, + "step": 9894, + "teacher_loss": 0.27456730604171753 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.28062915802001953, + "learning_rate": 2.850070624713019e-05, + "loss": 0.2643, + "step": 9895, + "teacher_loss": 0.2624356746673584 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.46978843212127686, + "learning_rate": 2.8499716298384094e-05, + "loss": 0.1822, + "step": 9896, + "teacher_loss": 0.15021324157714844 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5566151738166809, + "learning_rate": 2.849872604012997e-05, + "loss": 0.2532, + "step": 9897, + "teacher_loss": 0.21945026516914368 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.41958922147750854, + "learning_rate": 2.8497735472390535e-05, + "loss": 0.2956, + "step": 9898, + "teacher_loss": 0.2818142771720886 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.6815909147262573, + "learning_rate": 2.849674459518849e-05, + "loss": 0.2954, + "step": 9899, + "teacher_loss": 0.2524435818195343 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.3325532078742981, + "learning_rate": 2.849575340854656e-05, + "loss": 0.2232, + "step": 9900, + "teacher_loss": 0.2110215276479721 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.04777495190501213, + "learning_rate": 2.8494761912487466e-05, + "loss": 0.1255, + "step": 9901, + "teacher_loss": 0.13408830761909485 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2978960871696472, + "learning_rate": 2.8493770107033935e-05, + "loss": 0.195, + "step": 9902, + "teacher_loss": 0.18360914289951324 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.27499282360076904, + "learning_rate": 2.8492777992208722e-05, + "loss": 0.2622, + "step": 9903, + "teacher_loss": 0.26077011227607727 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.47969692945480347, + "learning_rate": 2.8491785568034558e-05, + "loss": 0.2405, + "step": 9904, + "teacher_loss": 0.2139766812324524 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 1.199562668800354, + "learning_rate": 2.84907928345342e-05, + "loss": 0.4675, + "step": 9905, + "teacher_loss": 0.38611388206481934 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.38313227891921997, + "learning_rate": 2.848979979173041e-05, + "loss": 0.235, + "step": 9906, + "teacher_loss": 0.2185504287481308 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2430182695388794, + "learning_rate": 2.8488806439645957e-05, + "loss": 0.1943, + "step": 9907, + "teacher_loss": 0.1888800859451294 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2562699317932129, + "learning_rate": 2.8487812778303615e-05, + "loss": 0.194, + "step": 9908, + "teacher_loss": 0.18711219727993011 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5980132818222046, + "learning_rate": 2.8486818807726162e-05, + "loss": 0.2616, + "step": 9909, + "teacher_loss": 0.22427132725715637 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2748788595199585, + "learning_rate": 2.8485824527936388e-05, + "loss": 0.2585, + "step": 9910, + "teacher_loss": 0.256646990776062 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.7118290662765503, + "learning_rate": 2.8484829938957094e-05, + "loss": 0.2347, + "step": 9911, + "teacher_loss": 0.18172098696231842 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.44738996028900146, + "learning_rate": 2.8483835040811076e-05, + "loss": 0.2368, + "step": 9912, + "teacher_loss": 0.21338830888271332 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.09399784356355667, + "learning_rate": 2.8482839833521147e-05, + "loss": 0.1697, + "step": 9913, + "teacher_loss": 0.1780831217765808 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.7724782228469849, + "learning_rate": 2.8481844317110127e-05, + "loss": 0.3442, + "step": 9914, + "teacher_loss": 0.2965834140777588 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.9141056537628174, + "learning_rate": 2.8480848491600838e-05, + "loss": 0.3963, + "step": 9915, + "teacher_loss": 0.3387743830680847 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5922085046768188, + "learning_rate": 2.847985235701611e-05, + "loss": 0.2358, + "step": 9916, + "teacher_loss": 0.19620540738105774 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 1.0426573753356934, + "learning_rate": 2.847885591337878e-05, + "loss": 0.3734, + "step": 9917, + "teacher_loss": 0.2990170419216156 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.8257579803466797, + "learning_rate": 2.8477859160711696e-05, + "loss": 0.3603, + "step": 9918, + "teacher_loss": 0.3086114227771759 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5918781757354736, + "learning_rate": 2.8476862099037712e-05, + "loss": 0.3419, + "step": 9919, + "teacher_loss": 0.3141464293003082 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5281621217727661, + "learning_rate": 2.8475864728379682e-05, + "loss": 0.303, + "step": 9920, + "teacher_loss": 0.27793627977371216 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.3837212324142456, + "learning_rate": 2.8474867048760482e-05, + "loss": 0.3354, + "step": 9921, + "teacher_loss": 0.33006808161735535 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.5989212989807129, + "learning_rate": 2.8473869060202976e-05, + "loss": 0.4041, + "step": 9922, + "teacher_loss": 0.3824467062950134 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.38015252351760864, + "learning_rate": 2.847287076273005e-05, + "loss": 0.2438, + "step": 9923, + "teacher_loss": 0.22866132855415344 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.41769686341285706, + "learning_rate": 2.847187215636459e-05, + "loss": 0.2307, + "step": 9924, + "teacher_loss": 0.2099752426147461 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.9899097084999084, + "learning_rate": 2.8470873241129495e-05, + "loss": 0.3067, + "step": 9925, + "teacher_loss": 0.23078115284442902 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.6791456937789917, + "learning_rate": 2.8469874017047665e-05, + "loss": 0.2448, + "step": 9926, + "teacher_loss": 0.1965523064136505 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.36018267273902893, + "learning_rate": 2.846887448414201e-05, + "loss": 0.244, + "step": 9927, + "teacher_loss": 0.23109301924705505 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.4715564548969269, + "learning_rate": 2.846787464243544e-05, + "loss": 0.4214, + "step": 9928, + "teacher_loss": 0.41578084230422974 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.2985368072986603, + "learning_rate": 2.8466874491950887e-05, + "loss": 0.3594, + "step": 9929, + "teacher_loss": 0.3661215007305145 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.3729763925075531, + "learning_rate": 2.8465874032711278e-05, + "loss": 0.2684, + "step": 9930, + "teacher_loss": 0.2567366361618042 + }, + { + "compression_loss": 0.0, + "epoch": 1.79, + "label_loss": 0.573815107345581, + "learning_rate": 2.8464873264739543e-05, + "loss": 0.2992, + "step": 9931, + "teacher_loss": 0.26864123344421387 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.764059841632843, + "learning_rate": 2.8463872188058638e-05, + "loss": 0.33, + "step": 9932, + "teacher_loss": 0.28173232078552246 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.25906500220298767, + "learning_rate": 2.846287080269151e-05, + "loss": 0.2029, + "step": 9933, + "teacher_loss": 0.19660684466362 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.36125412583351135, + "learning_rate": 2.846186910866112e-05, + "loss": 0.2451, + "step": 9934, + "teacher_loss": 0.23220570385456085 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.7969882488250732, + "learning_rate": 2.8460867105990432e-05, + "loss": 0.4164, + "step": 9935, + "teacher_loss": 0.37408044934272766 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.5133858919143677, + "learning_rate": 2.845986479470242e-05, + "loss": 0.289, + "step": 9936, + "teacher_loss": 0.2640360891819 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.44553142786026, + "learning_rate": 2.8458862174820064e-05, + "loss": 0.3113, + "step": 9937, + "teacher_loss": 0.29635781049728394 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6729985475540161, + "learning_rate": 2.8457859246366348e-05, + "loss": 0.3297, + "step": 9938, + "teacher_loss": 0.29154330492019653 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.4567737579345703, + "learning_rate": 2.845685600936427e-05, + "loss": 0.2038, + "step": 9939, + "teacher_loss": 0.1756627857685089 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6258205771446228, + "learning_rate": 2.8455852463836826e-05, + "loss": 0.3584, + "step": 9940, + "teacher_loss": 0.32867884635925293 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6175175905227661, + "learning_rate": 2.845484860980703e-05, + "loss": 0.2989, + "step": 9941, + "teacher_loss": 0.2635374069213867 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6379503607749939, + "learning_rate": 2.845384444729789e-05, + "loss": 0.2831, + "step": 9942, + "teacher_loss": 0.24371957778930664 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.46774429082870483, + "learning_rate": 2.845283997633244e-05, + "loss": 0.3026, + "step": 9943, + "teacher_loss": 0.2841984033584595 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.296292245388031, + "learning_rate": 2.8451835196933703e-05, + "loss": 0.2022, + "step": 9944, + "teacher_loss": 0.19170412421226501 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.5297198295593262, + "learning_rate": 2.8450830109124712e-05, + "loss": 0.2266, + "step": 9945, + "teacher_loss": 0.192958801984787 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.3401559889316559, + "learning_rate": 2.8449824712928518e-05, + "loss": 0.2736, + "step": 9946, + "teacher_loss": 0.26623162627220154 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.15827253460884094, + "learning_rate": 2.8448819008368167e-05, + "loss": 0.2246, + "step": 9947, + "teacher_loss": 0.2319531887769699 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.28979456424713135, + "learning_rate": 2.8447812995466718e-05, + "loss": 0.2944, + "step": 9948, + "teacher_loss": 0.2948848009109497 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.3130207061767578, + "learning_rate": 2.844680667424723e-05, + "loss": 0.1846, + "step": 9949, + "teacher_loss": 0.17032350599765778 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.37660524249076843, + "learning_rate": 2.8445800044732787e-05, + "loss": 0.1886, + "step": 9950, + "teacher_loss": 0.1676691472530365 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.2756948471069336, + "learning_rate": 2.8444793106946458e-05, + "loss": 0.2418, + "step": 9951, + "teacher_loss": 0.23802588880062103 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.8937015533447266, + "learning_rate": 2.8443785860911337e-05, + "loss": 0.373, + "step": 9952, + "teacher_loss": 0.315145343542099 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.7640095353126526, + "learning_rate": 2.844277830665051e-05, + "loss": 0.2439, + "step": 9953, + "teacher_loss": 0.18606916069984436 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.1451510190963745, + "learning_rate": 2.844177044418708e-05, + "loss": 0.1885, + "step": 9954, + "teacher_loss": 0.19331926107406616 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.2929535508155823, + "learning_rate": 2.844076227354415e-05, + "loss": 0.2178, + "step": 9955, + "teacher_loss": 0.20941904187202454 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6764476299285889, + "learning_rate": 2.8439753794744848e-05, + "loss": 0.3585, + "step": 9956, + "teacher_loss": 0.32313090562820435 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6294410228729248, + "learning_rate": 2.843874500781228e-05, + "loss": 0.2763, + "step": 9957, + "teacher_loss": 0.23709014058113098 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6707977652549744, + "learning_rate": 2.8437735912769578e-05, + "loss": 0.4574, + "step": 9958, + "teacher_loss": 0.43368589878082275 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.4132692813873291, + "learning_rate": 2.8436726509639883e-05, + "loss": 0.1976, + "step": 9959, + "teacher_loss": 0.1736908257007599 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.213484525680542, + "learning_rate": 2.8435716798446338e-05, + "loss": 0.2447, + "step": 9960, + "teacher_loss": 0.24814267456531525 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 1.1961371898651123, + "learning_rate": 2.8434706779212083e-05, + "loss": 0.4944, + "step": 9961, + "teacher_loss": 0.4164496064186096 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.40899306535720825, + "learning_rate": 2.843369645196028e-05, + "loss": 0.3176, + "step": 9962, + "teacher_loss": 0.3074972331523895 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.49438756704330444, + "learning_rate": 2.8432685816714103e-05, + "loss": 0.3754, + "step": 9963, + "teacher_loss": 0.3621580898761749 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.4733750820159912, + "learning_rate": 2.8431674873496706e-05, + "loss": 0.4121, + "step": 9964, + "teacher_loss": 0.4052943289279938 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.46345195174217224, + "learning_rate": 2.843066362233128e-05, + "loss": 0.2429, + "step": 9965, + "teacher_loss": 0.21841484308242798 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.626489520072937, + "learning_rate": 2.8429652063240996e-05, + "loss": 0.3139, + "step": 9966, + "teacher_loss": 0.2791442275047302 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.3096197843551636, + "learning_rate": 2.842864019624906e-05, + "loss": 0.2373, + "step": 9967, + "teacher_loss": 0.22926105558872223 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6614104509353638, + "learning_rate": 2.8427628021378666e-05, + "loss": 0.249, + "step": 9968, + "teacher_loss": 0.20317423343658447 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.46194642782211304, + "learning_rate": 2.8426615538653016e-05, + "loss": 0.2883, + "step": 9969, + "teacher_loss": 0.2689683735370636 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.2057897299528122, + "learning_rate": 2.8425602748095328e-05, + "loss": 0.204, + "step": 9970, + "teacher_loss": 0.20376136898994446 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.41850346326828003, + "learning_rate": 2.8424589649728825e-05, + "loss": 0.2543, + "step": 9971, + "teacher_loss": 0.2361023724079132 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.339938759803772, + "learning_rate": 2.8423576243576726e-05, + "loss": 0.3325, + "step": 9972, + "teacher_loss": 0.3316519260406494 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.4035336971282959, + "learning_rate": 2.8422562529662273e-05, + "loss": 0.225, + "step": 9973, + "teacher_loss": 0.20515784621238708 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.44656214118003845, + "learning_rate": 2.84215485080087e-05, + "loss": 0.2922, + "step": 9974, + "teacher_loss": 0.27501797676086426 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.16691406071186066, + "learning_rate": 2.8420534178639265e-05, + "loss": 0.2176, + "step": 9975, + "teacher_loss": 0.22323036193847656 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.9612618684768677, + "learning_rate": 2.841951954157721e-05, + "loss": 0.4305, + "step": 9976, + "teacher_loss": 0.3715289235115051 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6616100072860718, + "learning_rate": 2.841850459684581e-05, + "loss": 0.3198, + "step": 9977, + "teacher_loss": 0.2817884087562561 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.5083587169647217, + "learning_rate": 2.8417489344468334e-05, + "loss": 0.3753, + "step": 9978, + "teacher_loss": 0.3604700565338135 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.3334214687347412, + "learning_rate": 2.841647378446805e-05, + "loss": 0.2187, + "step": 9979, + "teacher_loss": 0.20590610802173615 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.41912841796875, + "learning_rate": 2.841545791686825e-05, + "loss": 0.2928, + "step": 9980, + "teacher_loss": 0.27873697876930237 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.40075385570526123, + "learning_rate": 2.8414441741692222e-05, + "loss": 0.2616, + "step": 9981, + "teacher_loss": 0.24614089727401733 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.5900130271911621, + "learning_rate": 2.841342525896326e-05, + "loss": 0.3473, + "step": 9982, + "teacher_loss": 0.3203285336494446 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.6284139752388, + "learning_rate": 2.8412408468704673e-05, + "loss": 0.3818, + "step": 9983, + "teacher_loss": 0.35445356369018555 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.47278058528900146, + "learning_rate": 2.8411391370939772e-05, + "loss": 0.3755, + "step": 9984, + "teacher_loss": 0.36466896533966064 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.2656656503677368, + "learning_rate": 2.841037396569188e-05, + "loss": 0.2093, + "step": 9985, + "teacher_loss": 0.20304420590400696 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.5919592976570129, + "learning_rate": 2.8409356252984315e-05, + "loss": 0.3223, + "step": 9986, + "teacher_loss": 0.2923741936683655 + }, + { + "compression_loss": 0.0, + "epoch": 1.8, + "label_loss": 0.5456598997116089, + "learning_rate": 2.8408338232840417e-05, + "loss": 0.2549, + "step": 9987, + "teacher_loss": 0.22263062000274658 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.3786388337612152, + "learning_rate": 2.8407319905283522e-05, + "loss": 0.2728, + "step": 9988, + "teacher_loss": 0.2610505223274231 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.759294867515564, + "learning_rate": 2.840630127033698e-05, + "loss": 0.2799, + "step": 9989, + "teacher_loss": 0.22659653425216675 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.22815167903900146, + "learning_rate": 2.8405282328024146e-05, + "loss": 0.1731, + "step": 9990, + "teacher_loss": 0.167032390832901 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.3255714774131775, + "learning_rate": 2.8404263078368377e-05, + "loss": 0.2168, + "step": 9991, + "teacher_loss": 0.20470184087753296 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.831087589263916, + "learning_rate": 2.8403243521393045e-05, + "loss": 0.4199, + "step": 9992, + "teacher_loss": 0.3742283284664154 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.4013445973396301, + "learning_rate": 2.840222365712152e-05, + "loss": 0.2635, + "step": 9993, + "teacher_loss": 0.24819760024547577 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.540683388710022, + "learning_rate": 2.8401203485577192e-05, + "loss": 0.282, + "step": 9994, + "teacher_loss": 0.2532673478126526 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.25122275948524475, + "learning_rate": 2.8400183006783446e-05, + "loss": 0.223, + "step": 9995, + "teacher_loss": 0.21985140442848206 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.4782525599002838, + "learning_rate": 2.8399162220763678e-05, + "loss": 0.3334, + "step": 9996, + "teacher_loss": 0.31726616621017456 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.8623383045196533, + "learning_rate": 2.83981411275413e-05, + "loss": 0.64, + "step": 9997, + "teacher_loss": 0.6152902841567993 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.44521552324295044, + "learning_rate": 2.8397119727139708e-05, + "loss": 0.2733, + "step": 9998, + "teacher_loss": 0.25417762994766235 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.423713743686676, + "learning_rate": 2.8396098019582333e-05, + "loss": 0.3331, + "step": 9999, + "teacher_loss": 0.32298362255096436 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.4213792085647583, + "learning_rate": 2.839507600489259e-05, + "loss": 0.2469, + "step": 10000, + "teacher_loss": 0.22746586799621582 + }, + { + "epoch": 1.81, + "eval_exact_match": 79.48912015137181, + "eval_f1": 87.01614426531715, + "step": 10000 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.39092931151390076, + "learning_rate": 2.8394053683093916e-05, + "loss": 0.2119, + "step": 10001, + "teacher_loss": 0.19196656346321106 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.5520212650299072, + "learning_rate": 2.839303105420975e-05, + "loss": 0.399, + "step": 10002, + "teacher_loss": 0.38202935457229614 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.28513625264167786, + "learning_rate": 2.8392008118263533e-05, + "loss": 0.2307, + "step": 10003, + "teacher_loss": 0.22462332248687744 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.3025251626968384, + "learning_rate": 2.8390984875278724e-05, + "loss": 0.2268, + "step": 10004, + "teacher_loss": 0.21842685341835022 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.6585941910743713, + "learning_rate": 2.8389961325278776e-05, + "loss": 0.3615, + "step": 10005, + "teacher_loss": 0.3284519910812378 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.26177719235420227, + "learning_rate": 2.8388937468287166e-05, + "loss": 0.1783, + "step": 10006, + "teacher_loss": 0.16898679733276367 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 1.025555968284607, + "learning_rate": 2.8387913304327356e-05, + "loss": 0.454, + "step": 10007, + "teacher_loss": 0.3905356526374817 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.45299583673477173, + "learning_rate": 2.8386888833422833e-05, + "loss": 0.2697, + "step": 10008, + "teacher_loss": 0.24938833713531494 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.4554581642150879, + "learning_rate": 2.838586405559709e-05, + "loss": 0.2899, + "step": 10009, + "teacher_loss": 0.271531879901886 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.2757774591445923, + "learning_rate": 2.8384838970873613e-05, + "loss": 0.176, + "step": 10010, + "teacher_loss": 0.1649159938097 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.22103755176067352, + "learning_rate": 2.8383813579275912e-05, + "loss": 0.2237, + "step": 10011, + "teacher_loss": 0.22398334741592407 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.6249642372131348, + "learning_rate": 2.8382787880827488e-05, + "loss": 0.3466, + "step": 10012, + "teacher_loss": 0.3157137632369995 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.5051115155220032, + "learning_rate": 2.8381761875551865e-05, + "loss": 0.2602, + "step": 10013, + "teacher_loss": 0.2329789251089096 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.46477973461151123, + "learning_rate": 2.8380735563472564e-05, + "loss": 0.4664, + "step": 10014, + "teacher_loss": 0.466605007648468 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.4993470311164856, + "learning_rate": 2.8379708944613112e-05, + "loss": 0.2636, + "step": 10015, + "teacher_loss": 0.23744003474712372 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.7189319133758545, + "learning_rate": 2.8378682018997046e-05, + "loss": 0.2464, + "step": 10016, + "teacher_loss": 0.19386765360832214 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.5674004554748535, + "learning_rate": 2.8377654786647916e-05, + "loss": 0.3371, + "step": 10017, + "teacher_loss": 0.3115500211715698 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.2641545534133911, + "learning_rate": 2.8376627247589268e-05, + "loss": 0.2601, + "step": 10018, + "teacher_loss": 0.2596549391746521 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.3129035532474518, + "learning_rate": 2.8375599401844665e-05, + "loss": 0.172, + "step": 10019, + "teacher_loss": 0.15633898973464966 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.18933723866939545, + "learning_rate": 2.8374571249437666e-05, + "loss": 0.2484, + "step": 10020, + "teacher_loss": 0.2549378275871277 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.18536841869354248, + "learning_rate": 2.837354279039185e-05, + "loss": 0.2221, + "step": 10021, + "teacher_loss": 0.22623273730278015 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.2101885974407196, + "learning_rate": 2.8372514024730792e-05, + "loss": 0.2298, + "step": 10022, + "teacher_loss": 0.23194262385368347 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.48914965987205505, + "learning_rate": 2.837148495247808e-05, + "loss": 0.3205, + "step": 10023, + "teacher_loss": 0.3017808794975281 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.26564300060272217, + "learning_rate": 2.837045557365731e-05, + "loss": 0.3501, + "step": 10024, + "teacher_loss": 0.35946038365364075 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.5714926719665527, + "learning_rate": 2.836942588829208e-05, + "loss": 0.2595, + "step": 10025, + "teacher_loss": 0.22483046352863312 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.7653529644012451, + "learning_rate": 2.8368395896405997e-05, + "loss": 0.2967, + "step": 10026, + "teacher_loss": 0.24461224675178528 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.6550235152244568, + "learning_rate": 2.836736559802268e-05, + "loss": 0.3184, + "step": 10027, + "teacher_loss": 0.2809876799583435 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.5254228115081787, + "learning_rate": 2.8366334993165744e-05, + "loss": 0.3109, + "step": 10028, + "teacher_loss": 0.2870434522628784 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.4441121816635132, + "learning_rate": 2.836530408185882e-05, + "loss": 0.2124, + "step": 10029, + "teacher_loss": 0.18661056458950043 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.29186293482780457, + "learning_rate": 2.836427286412555e-05, + "loss": 0.3306, + "step": 10030, + "teacher_loss": 0.33490413427352905 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.22067967057228088, + "learning_rate": 2.8363241339989567e-05, + "loss": 0.2584, + "step": 10031, + "teacher_loss": 0.26260995864868164 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.44406452775001526, + "learning_rate": 2.8362209509474525e-05, + "loss": 0.3025, + "step": 10032, + "teacher_loss": 0.28674912452697754 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.721721351146698, + "learning_rate": 2.836117737260408e-05, + "loss": 0.2223, + "step": 10033, + "teacher_loss": 0.16681170463562012 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.2818625271320343, + "learning_rate": 2.83601449294019e-05, + "loss": 0.2368, + "step": 10034, + "teacher_loss": 0.23182731866836548 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.7916396856307983, + "learning_rate": 2.835911217989165e-05, + "loss": 0.2997, + "step": 10035, + "teacher_loss": 0.2450808584690094 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.3018155097961426, + "learning_rate": 2.8358079124097013e-05, + "loss": 0.2592, + "step": 10036, + "teacher_loss": 0.25441431999206543 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.35869908332824707, + "learning_rate": 2.835704576204167e-05, + "loss": 0.3396, + "step": 10037, + "teacher_loss": 0.3374679684638977 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.20626285672187805, + "learning_rate": 2.8356012093749312e-05, + "loss": 0.2218, + "step": 10038, + "teacher_loss": 0.2235313355922699 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.7450047731399536, + "learning_rate": 2.8354978119243643e-05, + "loss": 0.2842, + "step": 10039, + "teacher_loss": 0.2330470085144043 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.2529345452785492, + "learning_rate": 2.8353943838548365e-05, + "loss": 0.3474, + "step": 10040, + "teacher_loss": 0.35788941383361816 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.475966215133667, + "learning_rate": 2.8352909251687193e-05, + "loss": 0.4137, + "step": 10041, + "teacher_loss": 0.40676021575927734 + }, + { + "compression_loss": 0.0, + "epoch": 1.81, + "label_loss": 0.7100175619125366, + "learning_rate": 2.8351874358683844e-05, + "loss": 0.4265, + "step": 10042, + "teacher_loss": 0.39495301246643066 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6449384689331055, + "learning_rate": 2.8350839159562047e-05, + "loss": 0.3245, + "step": 10043, + "teacher_loss": 0.28892967104911804 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.22249314188957214, + "learning_rate": 2.8349803654345538e-05, + "loss": 0.1712, + "step": 10044, + "teacher_loss": 0.1654951572418213 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.5362383127212524, + "learning_rate": 2.8348767843058054e-05, + "loss": 0.2871, + "step": 10045, + "teacher_loss": 0.25945594906806946 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.21562406420707703, + "learning_rate": 2.8347731725723346e-05, + "loss": 0.1791, + "step": 10046, + "teacher_loss": 0.1750330626964569 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.642021119594574, + "learning_rate": 2.8346695302365165e-05, + "loss": 0.2823, + "step": 10047, + "teacher_loss": 0.2423771172761917 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.35786062479019165, + "learning_rate": 2.8345658573007286e-05, + "loss": 0.1922, + "step": 10048, + "teacher_loss": 0.1737847924232483 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.4084697365760803, + "learning_rate": 2.834462153767346e-05, + "loss": 0.2245, + "step": 10049, + "teacher_loss": 0.20402434468269348 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.18152669072151184, + "learning_rate": 2.834358419638747e-05, + "loss": 0.2266, + "step": 10050, + "teacher_loss": 0.23164907097816467 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.8116253018379211, + "learning_rate": 2.8342546549173104e-05, + "loss": 0.2668, + "step": 10051, + "teacher_loss": 0.20628829300403595 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.3889818489551544, + "learning_rate": 2.834150859605415e-05, + "loss": 0.2248, + "step": 10052, + "teacher_loss": 0.20655973255634308 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.19807803630828857, + "learning_rate": 2.8340470337054402e-05, + "loss": 0.2144, + "step": 10053, + "teacher_loss": 0.2161865532398224 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.4396580457687378, + "learning_rate": 2.8339431772197668e-05, + "loss": 0.1799, + "step": 10054, + "teacher_loss": 0.15105697512626648 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.17564627528190613, + "learning_rate": 2.833839290150775e-05, + "loss": 0.1805, + "step": 10055, + "teacher_loss": 0.18101385235786438 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.26237696409225464, + "learning_rate": 2.8337353725008482e-05, + "loss": 0.2522, + "step": 10056, + "teacher_loss": 0.2510750889778137 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.48744410276412964, + "learning_rate": 2.8336314242723674e-05, + "loss": 0.3148, + "step": 10057, + "teacher_loss": 0.2956140339374542 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.25666099786758423, + "learning_rate": 2.8335274454677168e-05, + "loss": 0.2218, + "step": 10058, + "teacher_loss": 0.21788930892944336 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.7666101455688477, + "learning_rate": 2.8334234360892797e-05, + "loss": 0.3207, + "step": 10059, + "teacher_loss": 0.2711900472640991 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.7123481035232544, + "learning_rate": 2.8333193961394415e-05, + "loss": 0.2505, + "step": 10060, + "teacher_loss": 0.19918644428253174 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.4849434494972229, + "learning_rate": 2.833215325620587e-05, + "loss": 0.2957, + "step": 10061, + "teacher_loss": 0.2747054398059845 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.36107003688812256, + "learning_rate": 2.8331112245351018e-05, + "loss": 0.3197, + "step": 10062, + "teacher_loss": 0.3150624632835388 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.34174734354019165, + "learning_rate": 2.8330070928853734e-05, + "loss": 0.2148, + "step": 10063, + "teacher_loss": 0.20071497559547424 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.36111021041870117, + "learning_rate": 2.832902930673789e-05, + "loss": 0.2706, + "step": 10064, + "teacher_loss": 0.2605125308036804 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.30980220437049866, + "learning_rate": 2.8327987379027364e-05, + "loss": 0.2287, + "step": 10065, + "teacher_loss": 0.2196970283985138 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.29366597533226013, + "learning_rate": 2.8326945145746053e-05, + "loss": 0.2184, + "step": 10066, + "teacher_loss": 0.21008357405662537 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.7954883575439453, + "learning_rate": 2.832590260691784e-05, + "loss": 0.2903, + "step": 10067, + "teacher_loss": 0.23414069414138794 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6837584972381592, + "learning_rate": 2.8324859762566634e-05, + "loss": 0.3752, + "step": 10068, + "teacher_loss": 0.3409465551376343 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.2502906620502472, + "learning_rate": 2.8323816612716345e-05, + "loss": 0.2311, + "step": 10069, + "teacher_loss": 0.22891896963119507 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.9472180604934692, + "learning_rate": 2.8322773157390887e-05, + "loss": 0.4705, + "step": 10070, + "teacher_loss": 0.41748660802841187 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.4407423138618469, + "learning_rate": 2.8321729396614185e-05, + "loss": 0.3442, + "step": 10071, + "teacher_loss": 0.333477258682251 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.46047964692115784, + "learning_rate": 2.832068533041017e-05, + "loss": 0.2319, + "step": 10072, + "teacher_loss": 0.20654237270355225 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.38682061433792114, + "learning_rate": 2.831964095880277e-05, + "loss": 0.3607, + "step": 10073, + "teacher_loss": 0.35777562856674194 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.5228363275527954, + "learning_rate": 2.8318596281815948e-05, + "loss": 0.2657, + "step": 10074, + "teacher_loss": 0.23716843128204346 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.2322489619255066, + "learning_rate": 2.831755129947364e-05, + "loss": 0.1683, + "step": 10075, + "teacher_loss": 0.16121214628219604 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.38180452585220337, + "learning_rate": 2.8316506011799808e-05, + "loss": 0.2207, + "step": 10076, + "teacher_loss": 0.2028535008430481 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6159590482711792, + "learning_rate": 2.8315460418818416e-05, + "loss": 0.3025, + "step": 10077, + "teacher_loss": 0.26765990257263184 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.5178865194320679, + "learning_rate": 2.831441452055344e-05, + "loss": 0.3505, + "step": 10078, + "teacher_loss": 0.33192354440689087 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.7684535384178162, + "learning_rate": 2.8313368317028862e-05, + "loss": 0.2792, + "step": 10079, + "teacher_loss": 0.22478681802749634 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.5380355715751648, + "learning_rate": 2.831232180826866e-05, + "loss": 0.3462, + "step": 10080, + "teacher_loss": 0.3248831629753113 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.5595866441726685, + "learning_rate": 2.8311274994296835e-05, + "loss": 0.3168, + "step": 10081, + "teacher_loss": 0.28977569937705994 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6758938431739807, + "learning_rate": 2.831022787513738e-05, + "loss": 0.3626, + "step": 10082, + "teacher_loss": 0.3278322219848633 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6684085130691528, + "learning_rate": 2.8309180450814304e-05, + "loss": 0.3742, + "step": 10083, + "teacher_loss": 0.3415384292602539 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.7728433609008789, + "learning_rate": 2.830813272135163e-05, + "loss": 0.3425, + "step": 10084, + "teacher_loss": 0.29471856355667114 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.4471004605293274, + "learning_rate": 2.8307084686773367e-05, + "loss": 0.265, + "step": 10085, + "teacher_loss": 0.24478890001773834 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.3530995547771454, + "learning_rate": 2.830603634710355e-05, + "loss": 0.2421, + "step": 10086, + "teacher_loss": 0.2297591269016266 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.28494173288345337, + "learning_rate": 2.8304987702366214e-05, + "loss": 0.217, + "step": 10087, + "teacher_loss": 0.20944523811340332 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6312648057937622, + "learning_rate": 2.83039387525854e-05, + "loss": 0.5607, + "step": 10088, + "teacher_loss": 0.5528750419616699 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.28036999702453613, + "learning_rate": 2.8302889497785156e-05, + "loss": 0.2622, + "step": 10089, + "teacher_loss": 0.26023340225219727 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.3508296012878418, + "learning_rate": 2.830183993798954e-05, + "loss": 0.2376, + "step": 10090, + "teacher_loss": 0.225011944770813 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.6957626342773438, + "learning_rate": 2.830079007322262e-05, + "loss": 0.3616, + "step": 10091, + "teacher_loss": 0.32449856400489807 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.3001045882701874, + "learning_rate": 2.829973990350846e-05, + "loss": 0.2415, + "step": 10092, + "teacher_loss": 0.23493772745132446 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.5480846166610718, + "learning_rate": 2.8298689428871135e-05, + "loss": 0.3804, + "step": 10093, + "teacher_loss": 0.3617645502090454 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.3611062169075012, + "learning_rate": 2.829763864933473e-05, + "loss": 0.276, + "step": 10094, + "teacher_loss": 0.26650309562683105 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.4307994246482849, + "learning_rate": 2.8296587564923346e-05, + "loss": 0.1753, + "step": 10095, + "teacher_loss": 0.14691662788391113 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.25874757766723633, + "learning_rate": 2.8295536175661073e-05, + "loss": 0.3003, + "step": 10096, + "teacher_loss": 0.30494174361228943 + }, + { + "compression_loss": 0.0, + "epoch": 1.82, + "label_loss": 0.15567296743392944, + "learning_rate": 2.8294484481572018e-05, + "loss": 0.1981, + "step": 10097, + "teacher_loss": 0.20278194546699524 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.40125876665115356, + "learning_rate": 2.8293432482680292e-05, + "loss": 0.2449, + "step": 10098, + "teacher_loss": 0.2275681346654892 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.25205883383750916, + "learning_rate": 2.8292380179010014e-05, + "loss": 0.292, + "step": 10099, + "teacher_loss": 0.29645413160324097 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4218711256980896, + "learning_rate": 2.8291327570585312e-05, + "loss": 0.2594, + "step": 10100, + "teacher_loss": 0.2413722574710846 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.41986334323883057, + "learning_rate": 2.829027465743032e-05, + "loss": 0.4004, + "step": 10101, + "teacher_loss": 0.3982834219932556 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.23255914449691772, + "learning_rate": 2.8289221439569172e-05, + "loss": 0.2781, + "step": 10102, + "teacher_loss": 0.28320854902267456 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.7649527788162231, + "learning_rate": 2.8288167917026022e-05, + "loss": 0.3654, + "step": 10103, + "teacher_loss": 0.32100915908813477 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.559751033782959, + "learning_rate": 2.8287114089825022e-05, + "loss": 0.3137, + "step": 10104, + "teacher_loss": 0.2863689363002777 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.7335920333862305, + "learning_rate": 2.8286059957990334e-05, + "loss": 0.5446, + "step": 10105, + "teacher_loss": 0.5235691070556641 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4732711911201477, + "learning_rate": 2.8285005521546122e-05, + "loss": 0.2097, + "step": 10106, + "teacher_loss": 0.18043681979179382 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.8340111970901489, + "learning_rate": 2.8283950780516567e-05, + "loss": 0.3805, + "step": 10107, + "teacher_loss": 0.33015868067741394 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.16369491815567017, + "learning_rate": 2.8282895734925846e-05, + "loss": 0.3181, + "step": 10108, + "teacher_loss": 0.3352566063404083 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.2734563946723938, + "learning_rate": 2.8281840384798147e-05, + "loss": 0.24, + "step": 10109, + "teacher_loss": 0.23631922900676727 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.583782434463501, + "learning_rate": 2.8280784730157676e-05, + "loss": 0.2547, + "step": 10110, + "teacher_loss": 0.21817341446876526 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.30771467089653015, + "learning_rate": 2.8279728771028623e-05, + "loss": 0.2511, + "step": 10111, + "teacher_loss": 0.24480858445167542 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.3733453154563904, + "learning_rate": 2.8278672507435207e-05, + "loss": 0.2883, + "step": 10112, + "teacher_loss": 0.2788010835647583 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.503891110420227, + "learning_rate": 2.827761593940164e-05, + "loss": 0.2786, + "step": 10113, + "teacher_loss": 0.2535475194454193 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.15241241455078125, + "learning_rate": 2.827655906695215e-05, + "loss": 0.1786, + "step": 10114, + "teacher_loss": 0.18150311708450317 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.2417047917842865, + "learning_rate": 2.8275501890110966e-05, + "loss": 0.2329, + "step": 10115, + "teacher_loss": 0.23192723095417023 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.8406541347503662, + "learning_rate": 2.827444440890232e-05, + "loss": 0.352, + "step": 10116, + "teacher_loss": 0.2977423667907715 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.5134698748588562, + "learning_rate": 2.827338662335047e-05, + "loss": 0.1835, + "step": 10117, + "teacher_loss": 0.14684215188026428 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.49457627534866333, + "learning_rate": 2.827232853347966e-05, + "loss": 0.2637, + "step": 10118, + "teacher_loss": 0.23808389902114868 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.2242611050605774, + "learning_rate": 2.8271270139314144e-05, + "loss": 0.1725, + "step": 10119, + "teacher_loss": 0.1667870730161667 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.5023941397666931, + "learning_rate": 2.82702114408782e-05, + "loss": 0.2196, + "step": 10120, + "teacher_loss": 0.18816867470741272 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.6886653900146484, + "learning_rate": 2.8269152438196086e-05, + "loss": 0.2976, + "step": 10121, + "teacher_loss": 0.25413933396339417 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4681745767593384, + "learning_rate": 2.826809313129209e-05, + "loss": 0.2122, + "step": 10122, + "teacher_loss": 0.18379315733909607 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.31001555919647217, + "learning_rate": 2.82670335201905e-05, + "loss": 0.2282, + "step": 10123, + "teacher_loss": 0.2190977931022644 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.6931607127189636, + "learning_rate": 2.8265973604915613e-05, + "loss": 0.3191, + "step": 10124, + "teacher_loss": 0.27755096554756165 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.7336114645004272, + "learning_rate": 2.826491338549172e-05, + "loss": 0.2732, + "step": 10125, + "teacher_loss": 0.22206394374370575 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.487165629863739, + "learning_rate": 2.8263852861943134e-05, + "loss": 0.26, + "step": 10126, + "teacher_loss": 0.2347734570503235 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4697193503379822, + "learning_rate": 2.826279203429417e-05, + "loss": 0.2655, + "step": 10127, + "teacher_loss": 0.24282614886760712 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4233858585357666, + "learning_rate": 2.8261730902569146e-05, + "loss": 0.4074, + "step": 10128, + "teacher_loss": 0.4056728482246399 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.19034206867218018, + "learning_rate": 2.8260669466792394e-05, + "loss": 0.2043, + "step": 10129, + "teacher_loss": 0.20580735802650452 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.39717885851860046, + "learning_rate": 2.8259607726988252e-05, + "loss": 0.2395, + "step": 10130, + "teacher_loss": 0.22195212543010712 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.5770330429077148, + "learning_rate": 2.8258545683181058e-05, + "loss": 0.3568, + "step": 10131, + "teacher_loss": 0.3323458433151245 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.47333037853240967, + "learning_rate": 2.825748333539516e-05, + "loss": 0.2968, + "step": 10132, + "teacher_loss": 0.27721142768859863 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.26156559586524963, + "learning_rate": 2.825642068365492e-05, + "loss": 0.2922, + "step": 10133, + "teacher_loss": 0.29555946588516235 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.7056933641433716, + "learning_rate": 2.82553577279847e-05, + "loss": 0.3848, + "step": 10134, + "teacher_loss": 0.34914886951446533 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.3210175335407257, + "learning_rate": 2.8254294468408862e-05, + "loss": 0.2311, + "step": 10135, + "teacher_loss": 0.22111928462982178 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 1.003559947013855, + "learning_rate": 2.8253230904951794e-05, + "loss": 0.3414, + "step": 10136, + "teacher_loss": 0.2678506374359131 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.3333669900894165, + "learning_rate": 2.825216703763788e-05, + "loss": 0.2808, + "step": 10137, + "teacher_loss": 0.274944931268692 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.32442617416381836, + "learning_rate": 2.8251102866491507e-05, + "loss": 0.2413, + "step": 10138, + "teacher_loss": 0.23203572630882263 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.22870692610740662, + "learning_rate": 2.825003839153707e-05, + "loss": 0.1696, + "step": 10139, + "teacher_loss": 0.16301561892032623 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4195851683616638, + "learning_rate": 2.8248973612798975e-05, + "loss": 0.256, + "step": 10140, + "teacher_loss": 0.23787939548492432 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.20134034752845764, + "learning_rate": 2.8247908530301646e-05, + "loss": 0.2581, + "step": 10141, + "teacher_loss": 0.26441875100135803 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 1.0713342428207397, + "learning_rate": 2.824684314406949e-05, + "loss": 0.3513, + "step": 10142, + "teacher_loss": 0.2712668776512146 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.32342541217803955, + "learning_rate": 2.8245777454126937e-05, + "loss": 0.2652, + "step": 10143, + "teacher_loss": 0.25872209668159485 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.3959038555622101, + "learning_rate": 2.824471146049842e-05, + "loss": 0.3081, + "step": 10144, + "teacher_loss": 0.29834097623825073 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.26506179571151733, + "learning_rate": 2.824364516320838e-05, + "loss": 0.2216, + "step": 10145, + "teacher_loss": 0.2167806178331375 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.36636945605278015, + "learning_rate": 2.8242578562281266e-05, + "loss": 0.2682, + "step": 10146, + "teacher_loss": 0.2573147416114807 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.1470733880996704, + "learning_rate": 2.8241511657741525e-05, + "loss": 0.2031, + "step": 10147, + "teacher_loss": 0.2093411535024643 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.6461858749389648, + "learning_rate": 2.824044444961362e-05, + "loss": 0.2538, + "step": 10148, + "teacher_loss": 0.2102227807044983 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.4144626557826996, + "learning_rate": 2.8239376937922022e-05, + "loss": 0.2305, + "step": 10149, + "teacher_loss": 0.21000996232032776 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.26975417137145996, + "learning_rate": 2.8238309122691206e-05, + "loss": 0.1744, + "step": 10150, + "teacher_loss": 0.16384296119213104 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.44550907611846924, + "learning_rate": 2.823724100394565e-05, + "loss": 0.3536, + "step": 10151, + "teacher_loss": 0.343362957239151 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.6039840579032898, + "learning_rate": 2.8236172581709844e-05, + "loss": 0.2742, + "step": 10152, + "teacher_loss": 0.23760539293289185 + }, + { + "compression_loss": 0.0, + "epoch": 1.83, + "label_loss": 0.5013165473937988, + "learning_rate": 2.823510385600829e-05, + "loss": 0.3639, + "step": 10153, + "teacher_loss": 0.3486413061618805 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.33236414194107056, + "learning_rate": 2.823403482686548e-05, + "loss": 0.2587, + "step": 10154, + "teacher_loss": 0.25050631165504456 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.20856590569019318, + "learning_rate": 2.8232965494305934e-05, + "loss": 0.2025, + "step": 10155, + "teacher_loss": 0.2018308937549591 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.27331018447875977, + "learning_rate": 2.8231895858354162e-05, + "loss": 0.2814, + "step": 10156, + "teacher_loss": 0.282331258058548 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3737063407897949, + "learning_rate": 2.8230825919034687e-05, + "loss": 0.2258, + "step": 10157, + "teacher_loss": 0.2093178927898407 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.971958339214325, + "learning_rate": 2.8229755676372043e-05, + "loss": 0.3436, + "step": 10158, + "teacher_loss": 0.27379894256591797 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3210369348526001, + "learning_rate": 2.822868513039077e-05, + "loss": 0.1905, + "step": 10159, + "teacher_loss": 0.17594194412231445 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.5199267864227295, + "learning_rate": 2.8227614281115404e-05, + "loss": 0.3311, + "step": 10160, + "teacher_loss": 0.31009846925735474 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6118385791778564, + "learning_rate": 2.82265431285705e-05, + "loss": 0.3156, + "step": 10161, + "teacher_loss": 0.2827160954475403 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.43781721591949463, + "learning_rate": 2.822547167278062e-05, + "loss": 0.2694, + "step": 10162, + "teacher_loss": 0.25066712498664856 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.7488399744033813, + "learning_rate": 2.822439991377033e-05, + "loss": 0.3808, + "step": 10163, + "teacher_loss": 0.3399509787559509 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.2312871217727661, + "learning_rate": 2.8223327851564193e-05, + "loss": 0.2556, + "step": 10164, + "teacher_loss": 0.2583398222923279 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.2981010377407074, + "learning_rate": 2.8222255486186798e-05, + "loss": 0.2966, + "step": 10165, + "teacher_loss": 0.29647552967071533 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.510927677154541, + "learning_rate": 2.822118281766272e-05, + "loss": 0.3766, + "step": 10166, + "teacher_loss": 0.36167585849761963 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.4946654736995697, + "learning_rate": 2.8220109846016568e-05, + "loss": 0.2047, + "step": 10167, + "teacher_loss": 0.17251738905906677 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.21726711094379425, + "learning_rate": 2.821903657127293e-05, + "loss": 0.1878, + "step": 10168, + "teacher_loss": 0.1845286786556244 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.446840763092041, + "learning_rate": 2.8217962993456415e-05, + "loss": 0.2738, + "step": 10169, + "teacher_loss": 0.2545472979545593 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3201420307159424, + "learning_rate": 2.8216889112591635e-05, + "loss": 0.2497, + "step": 10170, + "teacher_loss": 0.24187727272510529 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6139057874679565, + "learning_rate": 2.821581492870322e-05, + "loss": 0.3174, + "step": 10171, + "teacher_loss": 0.28450679779052734 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6112133264541626, + "learning_rate": 2.821474044181579e-05, + "loss": 0.2902, + "step": 10172, + "teacher_loss": 0.25452956557273865 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.5521390438079834, + "learning_rate": 2.8213665651953977e-05, + "loss": 0.4799, + "step": 10173, + "teacher_loss": 0.4718799591064453 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.31955623626708984, + "learning_rate": 2.821259055914243e-05, + "loss": 0.2485, + "step": 10174, + "teacher_loss": 0.24061307311058044 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.4162052869796753, + "learning_rate": 2.8211515163405798e-05, + "loss": 0.2387, + "step": 10175, + "teacher_loss": 0.21893858909606934 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6645916700363159, + "learning_rate": 2.8210439464768733e-05, + "loss": 0.3088, + "step": 10176, + "teacher_loss": 0.2692273259162903 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6467615365982056, + "learning_rate": 2.82093634632559e-05, + "loss": 0.3555, + "step": 10177, + "teacher_loss": 0.323160320520401 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.39171579480171204, + "learning_rate": 2.8208287158891956e-05, + "loss": 0.3172, + "step": 10178, + "teacher_loss": 0.308951199054718 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.37491148710250854, + "learning_rate": 2.82072105517016e-05, + "loss": 0.2971, + "step": 10179, + "teacher_loss": 0.28850528597831726 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.45946401357650757, + "learning_rate": 2.82061336417095e-05, + "loss": 0.3302, + "step": 10180, + "teacher_loss": 0.3158247768878937 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.5471202731132507, + "learning_rate": 2.8205056428940342e-05, + "loss": 0.337, + "step": 10181, + "teacher_loss": 0.3136206269264221 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.5714937448501587, + "learning_rate": 2.820397891341884e-05, + "loss": 0.4733, + "step": 10182, + "teacher_loss": 0.46238064765930176 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.4085143804550171, + "learning_rate": 2.8202901095169684e-05, + "loss": 0.235, + "step": 10183, + "teacher_loss": 0.21576423943042755 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 1.5349762439727783, + "learning_rate": 2.8201822974217592e-05, + "loss": 0.4565, + "step": 10184, + "teacher_loss": 0.33662736415863037 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.32591256499290466, + "learning_rate": 2.820074455058728e-05, + "loss": 0.2639, + "step": 10185, + "teacher_loss": 0.2570186257362366 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.67108154296875, + "learning_rate": 2.8199665824303473e-05, + "loss": 0.4043, + "step": 10186, + "teacher_loss": 0.37470734119415283 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3161235749721527, + "learning_rate": 2.8198586795390903e-05, + "loss": 0.3299, + "step": 10187, + "teacher_loss": 0.33146941661834717 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3146645426750183, + "learning_rate": 2.8197507463874312e-05, + "loss": 0.1895, + "step": 10188, + "teacher_loss": 0.1756211519241333 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3265650272369385, + "learning_rate": 2.819642782977844e-05, + "loss": 0.2113, + "step": 10189, + "teacher_loss": 0.19849266111850739 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.29414069652557373, + "learning_rate": 2.8195347893128046e-05, + "loss": 0.1848, + "step": 10190, + "teacher_loss": 0.17267151176929474 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.7510318756103516, + "learning_rate": 2.8194267653947886e-05, + "loss": 0.273, + "step": 10191, + "teacher_loss": 0.21990731358528137 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.5448933839797974, + "learning_rate": 2.8193187112262725e-05, + "loss": 0.3282, + "step": 10192, + "teacher_loss": 0.30417752265930176 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.36576300859451294, + "learning_rate": 2.8192106268097336e-05, + "loss": 0.2133, + "step": 10193, + "teacher_loss": 0.19635042548179626 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.08481509983539581, + "learning_rate": 2.8191025121476505e-05, + "loss": 0.1847, + "step": 10194, + "teacher_loss": 0.1957492232322693 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.22363883256912231, + "learning_rate": 2.818994367242502e-05, + "loss": 0.1963, + "step": 10195, + "teacher_loss": 0.19323772192001343 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.7947820425033569, + "learning_rate": 2.818886192096767e-05, + "loss": 0.2681, + "step": 10196, + "teacher_loss": 0.20955385267734528 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3694137930870056, + "learning_rate": 2.8187779867129255e-05, + "loss": 0.1636, + "step": 10197, + "teacher_loss": 0.14070913195610046 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.25417378544807434, + "learning_rate": 2.818669751093459e-05, + "loss": 0.2793, + "step": 10198, + "teacher_loss": 0.28213492035865784 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.5340924263000488, + "learning_rate": 2.8185614852408488e-05, + "loss": 0.2909, + "step": 10199, + "teacher_loss": 0.26389771699905396 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6272094249725342, + "learning_rate": 2.8184531891575766e-05, + "loss": 0.3619, + "step": 10200, + "teacher_loss": 0.3324025273323059 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.22327786684036255, + "learning_rate": 2.8183448628461262e-05, + "loss": 0.2749, + "step": 10201, + "teacher_loss": 0.28066080808639526 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.7067087888717651, + "learning_rate": 2.8182365063089803e-05, + "loss": 0.409, + "step": 10202, + "teacher_loss": 0.37595629692077637 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.21253371238708496, + "learning_rate": 2.8181281195486238e-05, + "loss": 0.2261, + "step": 10203, + "teacher_loss": 0.22765298187732697 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6708394289016724, + "learning_rate": 2.8180197025675412e-05, + "loss": 0.5265, + "step": 10204, + "teacher_loss": 0.510500431060791 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.44892236590385437, + "learning_rate": 2.817911255368219e-05, + "loss": 0.2643, + "step": 10205, + "teacher_loss": 0.24373331665992737 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.7344803214073181, + "learning_rate": 2.8178027779531422e-05, + "loss": 0.3394, + "step": 10206, + "teacher_loss": 0.29550835490226746 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.3964819014072418, + "learning_rate": 2.8176942703247993e-05, + "loss": 0.2015, + "step": 10207, + "teacher_loss": 0.1797998994588852 + }, + { + "compression_loss": 0.0, + "epoch": 1.84, + "label_loss": 0.6019722819328308, + "learning_rate": 2.8175857324856774e-05, + "loss": 0.309, + "step": 10208, + "teacher_loss": 0.2764304280281067 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.45681822299957275, + "learning_rate": 2.8174771644382646e-05, + "loss": 0.3935, + "step": 10209, + "teacher_loss": 0.3864993453025818 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.25299179553985596, + "learning_rate": 2.8173685661850508e-05, + "loss": 0.2075, + "step": 10210, + "teacher_loss": 0.2024236023426056 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.22379331290721893, + "learning_rate": 2.8172599377285252e-05, + "loss": 0.2195, + "step": 10211, + "teacher_loss": 0.21905580163002014 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3946741819381714, + "learning_rate": 2.8171512790711788e-05, + "loss": 0.2778, + "step": 10212, + "teacher_loss": 0.26478272676467896 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5840538740158081, + "learning_rate": 2.8170425902155025e-05, + "loss": 0.6142, + "step": 10213, + "teacher_loss": 0.6175504922866821 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.37158724665641785, + "learning_rate": 2.8169338711639886e-05, + "loss": 0.3092, + "step": 10214, + "teacher_loss": 0.3022826910018921 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5920960307121277, + "learning_rate": 2.816825121919129e-05, + "loss": 0.2899, + "step": 10215, + "teacher_loss": 0.256344735622406 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.23958027362823486, + "learning_rate": 2.8167163424834175e-05, + "loss": 0.2363, + "step": 10216, + "teacher_loss": 0.23591157793998718 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3541507124900818, + "learning_rate": 2.816607532859348e-05, + "loss": 0.2601, + "step": 10217, + "teacher_loss": 0.24967412650585175 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.2839756906032562, + "learning_rate": 2.8164986930494153e-05, + "loss": 0.3072, + "step": 10218, + "teacher_loss": 0.3097820281982422 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.40960606932640076, + "learning_rate": 2.816389823056114e-05, + "loss": 0.3108, + "step": 10219, + "teacher_loss": 0.29984527826309204 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.37976568937301636, + "learning_rate": 2.8162809228819417e-05, + "loss": 0.2385, + "step": 10220, + "teacher_loss": 0.22282083332538605 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5235974788665771, + "learning_rate": 2.8161719925293937e-05, + "loss": 0.2447, + "step": 10221, + "teacher_loss": 0.21365785598754883 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.48798346519470215, + "learning_rate": 2.8160630320009683e-05, + "loss": 0.3752, + "step": 10222, + "teacher_loss": 0.3626946210861206 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3554057776927948, + "learning_rate": 2.815954041299163e-05, + "loss": 0.2251, + "step": 10223, + "teacher_loss": 0.2106718122959137 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.6373621225357056, + "learning_rate": 2.815845020426477e-05, + "loss": 0.3036, + "step": 10224, + "teacher_loss": 0.26646918058395386 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5369683504104614, + "learning_rate": 2.81573596938541e-05, + "loss": 0.4047, + "step": 10225, + "teacher_loss": 0.3900377154350281 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.6793599128723145, + "learning_rate": 2.815626888178462e-05, + "loss": 0.3922, + "step": 10226, + "teacher_loss": 0.3602656424045563 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.8496882915496826, + "learning_rate": 2.815517776808134e-05, + "loss": 0.476, + "step": 10227, + "teacher_loss": 0.43449491262435913 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.48280957341194153, + "learning_rate": 2.8154086352769274e-05, + "loss": 0.3022, + "step": 10228, + "teacher_loss": 0.28212571144104004 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 1.0122978687286377, + "learning_rate": 2.8152994635873444e-05, + "loss": 0.3084, + "step": 10229, + "teacher_loss": 0.2302239090204239 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.9756971001625061, + "learning_rate": 2.8151902617418886e-05, + "loss": 0.4192, + "step": 10230, + "teacher_loss": 0.35741370916366577 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5650604963302612, + "learning_rate": 2.8150810297430624e-05, + "loss": 0.2741, + "step": 10231, + "teacher_loss": 0.24173066020011902 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.6005864143371582, + "learning_rate": 2.814971767593372e-05, + "loss": 0.8256, + "step": 10232, + "teacher_loss": 0.8506519794464111 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.4202248454093933, + "learning_rate": 2.814862475295321e-05, + "loss": 0.2318, + "step": 10233, + "teacher_loss": 0.2108844518661499 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 1.152825951576233, + "learning_rate": 2.8147531528514155e-05, + "loss": 0.3656, + "step": 10234, + "teacher_loss": 0.27807706594467163 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.41623347997665405, + "learning_rate": 2.814643800264162e-05, + "loss": 0.2444, + "step": 10235, + "teacher_loss": 0.22528806328773499 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5408117175102234, + "learning_rate": 2.8145344175360682e-05, + "loss": 0.2905, + "step": 10236, + "teacher_loss": 0.26263225078582764 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.16792520880699158, + "learning_rate": 2.814425004669641e-05, + "loss": 0.2365, + "step": 10237, + "teacher_loss": 0.24406728148460388 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.8898427486419678, + "learning_rate": 2.8143155616673893e-05, + "loss": 0.2851, + "step": 10238, + "teacher_loss": 0.2179526388645172 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 1.5235463380813599, + "learning_rate": 2.8142060885318223e-05, + "loss": 0.4215, + "step": 10239, + "teacher_loss": 0.2990465760231018 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.23764052987098694, + "learning_rate": 2.81409658526545e-05, + "loss": 0.1375, + "step": 10240, + "teacher_loss": 0.12635567784309387 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.25337323546409607, + "learning_rate": 2.8139870518707824e-05, + "loss": 0.2281, + "step": 10241, + "teacher_loss": 0.22531333565711975 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.2043193280696869, + "learning_rate": 2.8138774883503317e-05, + "loss": 0.266, + "step": 10242, + "teacher_loss": 0.2728240489959717 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.14969919621944427, + "learning_rate": 2.8137678947066095e-05, + "loss": 0.1999, + "step": 10243, + "teacher_loss": 0.20552149415016174 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.49693363904953003, + "learning_rate": 2.8136582709421283e-05, + "loss": 0.2759, + "step": 10244, + "teacher_loss": 0.25134241580963135 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3156214654445648, + "learning_rate": 2.813548617059401e-05, + "loss": 0.2117, + "step": 10245, + "teacher_loss": 0.20010985434055328 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.6361654996871948, + "learning_rate": 2.8134389330609424e-05, + "loss": 0.2892, + "step": 10246, + "teacher_loss": 0.25063055753707886 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.35382696986198425, + "learning_rate": 2.8133292189492673e-05, + "loss": 0.3031, + "step": 10247, + "teacher_loss": 0.297427237033844 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.30127787590026855, + "learning_rate": 2.8132194747268904e-05, + "loss": 0.2269, + "step": 10248, + "teacher_loss": 0.21867021918296814 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5748451948165894, + "learning_rate": 2.8131097003963285e-05, + "loss": 0.3392, + "step": 10249, + "teacher_loss": 0.31306034326553345 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.2904551327228546, + "learning_rate": 2.812999895960098e-05, + "loss": 0.2205, + "step": 10250, + "teacher_loss": 0.2127016931772232 + }, + { + "epoch": 1.85, + "eval_exact_match": 79.68779564806054, + "eval_f1": 87.05700786746246, + "step": 10250 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3238072991371155, + "learning_rate": 2.8128900614207162e-05, + "loss": 0.3607, + "step": 10251, + "teacher_loss": 0.36484265327453613 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.7196825742721558, + "learning_rate": 2.8127801967807016e-05, + "loss": 0.2651, + "step": 10252, + "teacher_loss": 0.21462179720401764 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.6263768672943115, + "learning_rate": 2.8126703020425733e-05, + "loss": 0.2882, + "step": 10253, + "teacher_loss": 0.25067460536956787 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3905763030052185, + "learning_rate": 2.8125603772088504e-05, + "loss": 0.2566, + "step": 10254, + "teacher_loss": 0.2417493760585785 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.3641684651374817, + "learning_rate": 2.812450422282053e-05, + "loss": 0.3182, + "step": 10255, + "teacher_loss": 0.31309932470321655 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.7814779281616211, + "learning_rate": 2.812340437264703e-05, + "loss": 0.4126, + "step": 10256, + "teacher_loss": 0.37165772914886475 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.45672136545181274, + "learning_rate": 2.8122304221593205e-05, + "loss": 0.3321, + "step": 10257, + "teacher_loss": 0.318206787109375 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.4643021523952484, + "learning_rate": 2.8121203769684293e-05, + "loss": 0.2557, + "step": 10258, + "teacher_loss": 0.23249977827072144 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 1.0059096813201904, + "learning_rate": 2.8120103016945518e-05, + "loss": 0.3634, + "step": 10259, + "teacher_loss": 0.29198941588401794 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.46291807293891907, + "learning_rate": 2.8119001963402117e-05, + "loss": 0.2846, + "step": 10260, + "teacher_loss": 0.2648296654224396 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.5385724306106567, + "learning_rate": 2.811790060907933e-05, + "loss": 0.2761, + "step": 10261, + "teacher_loss": 0.2469625622034073 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.42845237255096436, + "learning_rate": 2.8116798954002417e-05, + "loss": 0.2733, + "step": 10262, + "teacher_loss": 0.25606852769851685 + }, + { + "compression_loss": 0.0, + "epoch": 1.85, + "label_loss": 0.8055862188339233, + "learning_rate": 2.8115696998196627e-05, + "loss": 0.3378, + "step": 10263, + "teacher_loss": 0.2858313322067261 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.28058311343193054, + "learning_rate": 2.8114594741687226e-05, + "loss": 0.2118, + "step": 10264, + "teacher_loss": 0.20418499410152435 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5593300461769104, + "learning_rate": 2.811349218449949e-05, + "loss": 0.287, + "step": 10265, + "teacher_loss": 0.2567441165447235 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.7448901534080505, + "learning_rate": 2.8112389326658695e-05, + "loss": 0.3594, + "step": 10266, + "teacher_loss": 0.3165847361087799 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.6551687717437744, + "learning_rate": 2.811128616819012e-05, + "loss": 0.294, + "step": 10267, + "teacher_loss": 0.25382906198501587 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.45019686222076416, + "learning_rate": 2.811018270911907e-05, + "loss": 0.3458, + "step": 10268, + "teacher_loss": 0.3341796100139618 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.44331735372543335, + "learning_rate": 2.8109078949470833e-05, + "loss": 0.279, + "step": 10269, + "teacher_loss": 0.2607024610042572 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.412151575088501, + "learning_rate": 2.810797488927072e-05, + "loss": 0.289, + "step": 10270, + "teacher_loss": 0.27534863352775574 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.2876285910606384, + "learning_rate": 2.8106870528544044e-05, + "loss": 0.3257, + "step": 10271, + "teacher_loss": 0.3298966884613037 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5396019220352173, + "learning_rate": 2.8105765867316122e-05, + "loss": 0.37, + "step": 10272, + "teacher_loss": 0.35120469331741333 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.33282193541526794, + "learning_rate": 2.810466090561228e-05, + "loss": 0.2422, + "step": 10273, + "teacher_loss": 0.2321617752313614 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5569236278533936, + "learning_rate": 2.8103555643457855e-05, + "loss": 0.2606, + "step": 10274, + "teacher_loss": 0.22768956422805786 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5272619724273682, + "learning_rate": 2.8102450080878183e-05, + "loss": 0.3529, + "step": 10275, + "teacher_loss": 0.33354583382606506 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.32882049679756165, + "learning_rate": 2.8101344217898614e-05, + "loss": 0.2434, + "step": 10276, + "teacher_loss": 0.23395654559135437 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.839247465133667, + "learning_rate": 2.8100238054544507e-05, + "loss": 0.3454, + "step": 10277, + "teacher_loss": 0.29049360752105713 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.24745948612689972, + "learning_rate": 2.8099131590841213e-05, + "loss": 0.2056, + "step": 10278, + "teacher_loss": 0.20092225074768066 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.618982195854187, + "learning_rate": 2.8098024826814108e-05, + "loss": 0.2356, + "step": 10279, + "teacher_loss": 0.19299820065498352 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.2928692102432251, + "learning_rate": 2.8096917762488565e-05, + "loss": 0.1815, + "step": 10280, + "teacher_loss": 0.16912904381752014 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5782037377357483, + "learning_rate": 2.809581039788996e-05, + "loss": 0.3167, + "step": 10281, + "teacher_loss": 0.2876846492290497 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.4565497636795044, + "learning_rate": 2.8094702733043688e-05, + "loss": 0.299, + "step": 10282, + "teacher_loss": 0.281515508890152 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.34388020634651184, + "learning_rate": 2.8093594767975142e-05, + "loss": 0.2511, + "step": 10283, + "teacher_loss": 0.24075892567634583 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.605820894241333, + "learning_rate": 2.809248650270972e-05, + "loss": 0.2424, + "step": 10284, + "teacher_loss": 0.20201963186264038 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.2903047800064087, + "learning_rate": 2.8091377937272843e-05, + "loss": 0.2534, + "step": 10285, + "teacher_loss": 0.24929940700531006 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5776563286781311, + "learning_rate": 2.809026907168992e-05, + "loss": 0.2914, + "step": 10286, + "teacher_loss": 0.2596386671066284 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.3714195787906647, + "learning_rate": 2.808915990598637e-05, + "loss": 0.3024, + "step": 10287, + "teacher_loss": 0.29476410150527954 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.8987783193588257, + "learning_rate": 2.8088050440187623e-05, + "loss": 0.4274, + "step": 10288, + "teacher_loss": 0.3749772310256958 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.18082700669765472, + "learning_rate": 2.8086940674319128e-05, + "loss": 0.2016, + "step": 10289, + "teacher_loss": 0.20385286211967468 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.3965546488761902, + "learning_rate": 2.8085830608406314e-05, + "loss": 0.2735, + "step": 10290, + "teacher_loss": 0.2597719430923462 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.2651168704032898, + "learning_rate": 2.808472024247464e-05, + "loss": 0.2225, + "step": 10291, + "teacher_loss": 0.21779808402061462 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.3597174286842346, + "learning_rate": 2.808360957654956e-05, + "loss": 0.2854, + "step": 10292, + "teacher_loss": 0.27714797854423523 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.421950101852417, + "learning_rate": 2.808249861065654e-05, + "loss": 0.2614, + "step": 10293, + "teacher_loss": 0.2435457557439804 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5364779233932495, + "learning_rate": 2.808138734482105e-05, + "loss": 0.3611, + "step": 10294, + "teacher_loss": 0.34162285923957825 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.7266249656677246, + "learning_rate": 2.8080275779068566e-05, + "loss": 0.2856, + "step": 10295, + "teacher_loss": 0.23655295372009277 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 1.0063552856445312, + "learning_rate": 2.8079163913424578e-05, + "loss": 0.5515, + "step": 10296, + "teacher_loss": 0.5010131597518921 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.6146499514579773, + "learning_rate": 2.8078051747914575e-05, + "loss": 0.3196, + "step": 10297, + "teacher_loss": 0.28685081005096436 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.627341091632843, + "learning_rate": 2.8076939282564054e-05, + "loss": 0.5015, + "step": 10298, + "teacher_loss": 0.4875109791755676 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.17753227055072784, + "learning_rate": 2.8075826517398523e-05, + "loss": 0.2078, + "step": 10299, + "teacher_loss": 0.21119153499603271 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.12642362713813782, + "learning_rate": 2.8074713452443492e-05, + "loss": 0.1963, + "step": 10300, + "teacher_loss": 0.20410653948783875 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.9918332695960999, + "learning_rate": 2.8073600087724483e-05, + "loss": 0.421, + "step": 10301, + "teacher_loss": 0.3575218617916107 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.23232416808605194, + "learning_rate": 2.807248642326702e-05, + "loss": 0.2254, + "step": 10302, + "teacher_loss": 0.22466540336608887 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.6067183017730713, + "learning_rate": 2.807137245909664e-05, + "loss": 0.2871, + "step": 10303, + "teacher_loss": 0.25162273645401 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.8547205924987793, + "learning_rate": 2.807025819523887e-05, + "loss": 0.4478, + "step": 10304, + "teacher_loss": 0.40259578824043274 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5844119787216187, + "learning_rate": 2.8069143631719276e-05, + "loss": 0.3317, + "step": 10305, + "teacher_loss": 0.3035754859447479 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.6955540776252747, + "learning_rate": 2.8068028768563398e-05, + "loss": 0.2897, + "step": 10306, + "teacher_loss": 0.2446369230747223 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5403918027877808, + "learning_rate": 2.80669136057968e-05, + "loss": 0.2281, + "step": 10307, + "teacher_loss": 0.19342437386512756 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.40683913230895996, + "learning_rate": 2.806579814344505e-05, + "loss": 0.2769, + "step": 10308, + "teacher_loss": 0.2624482810497284 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.22389420866966248, + "learning_rate": 2.806468238153372e-05, + "loss": 0.1865, + "step": 10309, + "teacher_loss": 0.18238306045532227 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.4098944365978241, + "learning_rate": 2.8063566320088398e-05, + "loss": 0.2401, + "step": 10310, + "teacher_loss": 0.22123856842517853 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.718734085559845, + "learning_rate": 2.8062449959134663e-05, + "loss": 0.2996, + "step": 10311, + "teacher_loss": 0.2530561685562134 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.9787742495536804, + "learning_rate": 2.8061333298698114e-05, + "loss": 0.3759, + "step": 10312, + "teacher_loss": 0.30889958143234253 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.4509371519088745, + "learning_rate": 2.8060216338804353e-05, + "loss": 0.4063, + "step": 10313, + "teacher_loss": 0.4012956917285919 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.3239341378211975, + "learning_rate": 2.805909907947899e-05, + "loss": 0.2405, + "step": 10314, + "teacher_loss": 0.23124219477176666 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.5805948972702026, + "learning_rate": 2.8057981520747632e-05, + "loss": 0.269, + "step": 10315, + "teacher_loss": 0.23441605269908905 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.40246596932411194, + "learning_rate": 2.8056863662635912e-05, + "loss": 0.3997, + "step": 10316, + "teacher_loss": 0.39942896366119385 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.20643341541290283, + "learning_rate": 2.8055745505169457e-05, + "loss": 0.1857, + "step": 10317, + "teacher_loss": 0.18341004848480225 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.7771695256233215, + "learning_rate": 2.80546270483739e-05, + "loss": 0.2919, + "step": 10318, + "teacher_loss": 0.23798736929893494 + }, + { + "compression_loss": 0.0, + "epoch": 1.86, + "label_loss": 0.44528406858444214, + "learning_rate": 2.8053508292274878e-05, + "loss": 0.3219, + "step": 10319, + "teacher_loss": 0.3081396222114563 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4121386706829071, + "learning_rate": 2.8052389236898055e-05, + "loss": 0.2661, + "step": 10320, + "teacher_loss": 0.24985355138778687 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.38992244005203247, + "learning_rate": 2.8051269882269076e-05, + "loss": 0.2591, + "step": 10321, + "teacher_loss": 0.24451500177383423 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.6654698848724365, + "learning_rate": 2.8050150228413613e-05, + "loss": 0.3569, + "step": 10322, + "teacher_loss": 0.32262516021728516 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.557917594909668, + "learning_rate": 2.8049030275357324e-05, + "loss": 0.2809, + "step": 10323, + "teacher_loss": 0.25014495849609375 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.7684891223907471, + "learning_rate": 2.8047910023125897e-05, + "loss": 0.2665, + "step": 10324, + "teacher_loss": 0.2106896936893463 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.2600611448287964, + "learning_rate": 2.8046789471745012e-05, + "loss": 0.2548, + "step": 10325, + "teacher_loss": 0.25418734550476074 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4234950542449951, + "learning_rate": 2.8045668621240364e-05, + "loss": 0.3084, + "step": 10326, + "teacher_loss": 0.29563868045806885 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.41755688190460205, + "learning_rate": 2.8044547471637646e-05, + "loss": 0.2866, + "step": 10327, + "teacher_loss": 0.27202218770980835 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.10755197703838348, + "learning_rate": 2.8043426022962563e-05, + "loss": 0.1555, + "step": 10328, + "teacher_loss": 0.16083385050296783 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.11279372125864029, + "learning_rate": 2.8042304275240827e-05, + "loss": 0.2035, + "step": 10329, + "teacher_loss": 0.21358585357666016 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.7404637336730957, + "learning_rate": 2.8041182228498162e-05, + "loss": 0.3086, + "step": 10330, + "teacher_loss": 0.2605902850627899 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.5024974346160889, + "learning_rate": 2.804005988276028e-05, + "loss": 0.2821, + "step": 10331, + "teacher_loss": 0.2576325237751007 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.2890140414237976, + "learning_rate": 2.8038937238052926e-05, + "loss": 0.2613, + "step": 10332, + "teacher_loss": 0.2582029700279236 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.35045111179351807, + "learning_rate": 2.8037814294401835e-05, + "loss": 0.2869, + "step": 10333, + "teacher_loss": 0.27984681725502014 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4111517071723938, + "learning_rate": 2.803669105183275e-05, + "loss": 0.2395, + "step": 10334, + "teacher_loss": 0.22047272324562073 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3578011989593506, + "learning_rate": 2.8035567510371425e-05, + "loss": 0.2691, + "step": 10335, + "teacher_loss": 0.25926080346107483 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3235445022583008, + "learning_rate": 2.803444367004362e-05, + "loss": 0.3337, + "step": 10336, + "teacher_loss": 0.334883451461792 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.7203150987625122, + "learning_rate": 2.80333195308751e-05, + "loss": 0.27, + "step": 10337, + "teacher_loss": 0.2199324667453766 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.8291847705841064, + "learning_rate": 2.803219509289164e-05, + "loss": 0.4049, + "step": 10338, + "teacher_loss": 0.35776910185813904 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3767836093902588, + "learning_rate": 2.8031070356119015e-05, + "loss": 0.2553, + "step": 10339, + "teacher_loss": 0.2417478859424591 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.32927972078323364, + "learning_rate": 2.8029945320583025e-05, + "loss": 0.2349, + "step": 10340, + "teacher_loss": 0.22436952590942383 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.6173535585403442, + "learning_rate": 2.8028819986309443e-05, + "loss": 0.3463, + "step": 10341, + "teacher_loss": 0.3162160813808441 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.2319202423095703, + "learning_rate": 2.802769435332409e-05, + "loss": 0.1572, + "step": 10342, + "teacher_loss": 0.14884592592716217 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.37412744760513306, + "learning_rate": 2.8026568421652763e-05, + "loss": 0.2343, + "step": 10343, + "teacher_loss": 0.2187473475933075 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.29869046807289124, + "learning_rate": 2.8025442191321276e-05, + "loss": 0.2118, + "step": 10344, + "teacher_loss": 0.20211371779441833 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.34756967425346375, + "learning_rate": 2.8024315662355455e-05, + "loss": 0.2514, + "step": 10345, + "teacher_loss": 0.24073219299316406 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.38378316164016724, + "learning_rate": 2.8023188834781117e-05, + "loss": 0.2688, + "step": 10346, + "teacher_loss": 0.25606077909469604 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3386090099811554, + "learning_rate": 2.802206170862411e-05, + "loss": 0.2331, + "step": 10347, + "teacher_loss": 0.22138527035713196 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.23073986172676086, + "learning_rate": 2.802093428391027e-05, + "loss": 0.22, + "step": 10348, + "teacher_loss": 0.21878477931022644 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.20492029190063477, + "learning_rate": 2.801980656066545e-05, + "loss": 0.2015, + "step": 10349, + "teacher_loss": 0.20114190876483917 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.8918582797050476, + "learning_rate": 2.8018678538915493e-05, + "loss": 0.315, + "step": 10350, + "teacher_loss": 0.25091320276260376 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.8688955903053284, + "learning_rate": 2.8017550218686273e-05, + "loss": 0.3777, + "step": 10351, + "teacher_loss": 0.3231067359447479 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4511251449584961, + "learning_rate": 2.8016421600003654e-05, + "loss": 0.2717, + "step": 10352, + "teacher_loss": 0.25173014402389526 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4114728569984436, + "learning_rate": 2.8015292682893514e-05, + "loss": 0.2326, + "step": 10353, + "teacher_loss": 0.21269632875919342 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3526153564453125, + "learning_rate": 2.8014163467381734e-05, + "loss": 0.26, + "step": 10354, + "teacher_loss": 0.24965913593769073 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4737495183944702, + "learning_rate": 2.8013033953494206e-05, + "loss": 0.2281, + "step": 10355, + "teacher_loss": 0.20080244541168213 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.30782222747802734, + "learning_rate": 2.8011904141256825e-05, + "loss": 0.295, + "step": 10356, + "teacher_loss": 0.2935648262500763 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.2902483344078064, + "learning_rate": 2.8010774030695493e-05, + "loss": 0.303, + "step": 10357, + "teacher_loss": 0.30442625284194946 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.383614182472229, + "learning_rate": 2.8009643621836114e-05, + "loss": 0.2197, + "step": 10358, + "teacher_loss": 0.2015208899974823 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.2517194449901581, + "learning_rate": 2.800851291470462e-05, + "loss": 0.2289, + "step": 10359, + "teacher_loss": 0.22635729610919952 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4676363468170166, + "learning_rate": 2.8007381909326925e-05, + "loss": 0.2179, + "step": 10360, + "teacher_loss": 0.19012172520160675 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.45767152309417725, + "learning_rate": 2.8006250605728954e-05, + "loss": 0.2658, + "step": 10361, + "teacher_loss": 0.2444770336151123 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.6390374302864075, + "learning_rate": 2.800511900393666e-05, + "loss": 0.3325, + "step": 10362, + "teacher_loss": 0.29848289489746094 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.2626778185367584, + "learning_rate": 2.800398710397598e-05, + "loss": 0.1772, + "step": 10363, + "teacher_loss": 0.16768473386764526 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.8169225454330444, + "learning_rate": 2.8002854905872853e-05, + "loss": 0.4098, + "step": 10364, + "teacher_loss": 0.3645349442958832 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.750160276889801, + "learning_rate": 2.8001722409653258e-05, + "loss": 0.6115, + "step": 10365, + "teacher_loss": 0.596047043800354 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.4848783612251282, + "learning_rate": 2.8000589615343145e-05, + "loss": 0.2226, + "step": 10366, + "teacher_loss": 0.19344952702522278 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.15360905230045319, + "learning_rate": 2.799945652296849e-05, + "loss": 0.2319, + "step": 10367, + "teacher_loss": 0.24055379629135132 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3780812621116638, + "learning_rate": 2.799832313255527e-05, + "loss": 0.2775, + "step": 10368, + "teacher_loss": 0.26631176471710205 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.6879922151565552, + "learning_rate": 2.799718944412947e-05, + "loss": 0.272, + "step": 10369, + "teacher_loss": 0.22575025260448456 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.6711910367012024, + "learning_rate": 2.7996055457717094e-05, + "loss": 0.2644, + "step": 10370, + "teacher_loss": 0.21917986869812012 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.31732892990112305, + "learning_rate": 2.799492117334412e-05, + "loss": 0.2368, + "step": 10371, + "teacher_loss": 0.2278173267841339 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.31505274772644043, + "learning_rate": 2.7993786591036566e-05, + "loss": 0.3732, + "step": 10372, + "teacher_loss": 0.3796786665916443 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.3936498165130615, + "learning_rate": 2.7992651710820444e-05, + "loss": 0.2294, + "step": 10373, + "teacher_loss": 0.21115407347679138 + }, + { + "compression_loss": 0.0, + "epoch": 1.87, + "label_loss": 0.7516721487045288, + "learning_rate": 2.7991516532721777e-05, + "loss": 0.4612, + "step": 10374, + "teacher_loss": 0.42895299196243286 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.3688117265701294, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.2301, + "step": 10375, + "teacher_loss": 0.2147057056427002 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4552571773529053, + "learning_rate": 2.7989245282980897e-05, + "loss": 0.3089, + "step": 10376, + "teacher_loss": 0.2926892936229706 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.3523431420326233, + "learning_rate": 2.798810921139076e-05, + "loss": 0.2262, + "step": 10377, + "teacher_loss": 0.2121475636959076 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5056685209274292, + "learning_rate": 2.798697284202222e-05, + "loss": 0.2151, + "step": 10378, + "teacher_loss": 0.1828230321407318 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5542752742767334, + "learning_rate": 2.798583617490133e-05, + "loss": 0.3108, + "step": 10379, + "teacher_loss": 0.28379887342453003 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6121727228164673, + "learning_rate": 2.7984699210054153e-05, + "loss": 0.3471, + "step": 10380, + "teacher_loss": 0.31767114996910095 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.3316594660282135, + "learning_rate": 2.7983561947506746e-05, + "loss": 0.3235, + "step": 10381, + "teacher_loss": 0.3225858211517334 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6516517996788025, + "learning_rate": 2.7982424387285196e-05, + "loss": 0.4255, + "step": 10382, + "teacher_loss": 0.40033668279647827 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.9195019602775574, + "learning_rate": 2.7981286529415576e-05, + "loss": 0.3413, + "step": 10383, + "teacher_loss": 0.2770756483078003 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5371028184890747, + "learning_rate": 2.7980148373923977e-05, + "loss": 0.3291, + "step": 10384, + "teacher_loss": 0.30602073669433594 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4159952402114868, + "learning_rate": 2.7979009920836492e-05, + "loss": 0.1905, + "step": 10385, + "teacher_loss": 0.16549193859100342 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.24010604619979858, + "learning_rate": 2.7977871170179225e-05, + "loss": 0.2262, + "step": 10386, + "teacher_loss": 0.2246396243572235 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.3545704185962677, + "learning_rate": 2.7976732121978277e-05, + "loss": 0.2317, + "step": 10387, + "teacher_loss": 0.21804791688919067 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5020953416824341, + "learning_rate": 2.797559277625977e-05, + "loss": 0.4966, + "step": 10388, + "teacher_loss": 0.49594593048095703 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.8366658687591553, + "learning_rate": 2.7974453133049824e-05, + "loss": 0.5642, + "step": 10389, + "teacher_loss": 0.533894956111908 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.70830237865448, + "learning_rate": 2.7973313192374566e-05, + "loss": 0.3222, + "step": 10390, + "teacher_loss": 0.2792561948299408 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5858169794082642, + "learning_rate": 2.7972172954260132e-05, + "loss": 0.4356, + "step": 10391, + "teacher_loss": 0.4188770055770874 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.312599241733551, + "learning_rate": 2.797103241873267e-05, + "loss": 0.1699, + "step": 10392, + "teacher_loss": 0.15403355658054352 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.41025346517562866, + "learning_rate": 2.7969891585818317e-05, + "loss": 0.3853, + "step": 10393, + "teacher_loss": 0.3824838697910309 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.21359658241271973, + "learning_rate": 2.796875045554324e-05, + "loss": 0.2845, + "step": 10394, + "teacher_loss": 0.29233911633491516 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 1.2180202007293701, + "learning_rate": 2.7967609027933592e-05, + "loss": 0.3831, + "step": 10395, + "teacher_loss": 0.29035669565200806 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6336901783943176, + "learning_rate": 2.7966467303015554e-05, + "loss": 0.2795, + "step": 10396, + "teacher_loss": 0.24013368785381317 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4247021973133087, + "learning_rate": 2.796532528081529e-05, + "loss": 0.2291, + "step": 10397, + "teacher_loss": 0.20732617378234863 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4351074695587158, + "learning_rate": 2.7964182961358996e-05, + "loss": 0.3186, + "step": 10398, + "teacher_loss": 0.3056785762310028 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4462539851665497, + "learning_rate": 2.796304034467285e-05, + "loss": 0.2869, + "step": 10399, + "teacher_loss": 0.2691980302333832 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5241062045097351, + "learning_rate": 2.796189743078306e-05, + "loss": 0.263, + "step": 10400, + "teacher_loss": 0.23397159576416016 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4067451059818268, + "learning_rate": 2.796075421971582e-05, + "loss": 0.3073, + "step": 10401, + "teacher_loss": 0.29624900221824646 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.28739285469055176, + "learning_rate": 2.7959610711497345e-05, + "loss": 0.311, + "step": 10402, + "teacher_loss": 0.31365156173706055 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.31262698769569397, + "learning_rate": 2.795846690615385e-05, + "loss": 0.2206, + "step": 10403, + "teacher_loss": 0.2103470265865326 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5453461408615112, + "learning_rate": 2.795732280371156e-05, + "loss": 0.3287, + "step": 10404, + "teacher_loss": 0.30459287762641907 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.42466726899147034, + "learning_rate": 2.7956178404196707e-05, + "loss": 0.305, + "step": 10405, + "teacher_loss": 0.2916773557662964 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6043449640274048, + "learning_rate": 2.795503370763553e-05, + "loss": 0.3252, + "step": 10406, + "teacher_loss": 0.2942245304584503 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.31097254157066345, + "learning_rate": 2.7953888714054267e-05, + "loss": 0.2655, + "step": 10407, + "teacher_loss": 0.26046860218048096 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.1787864714860916, + "learning_rate": 2.7952743423479176e-05, + "loss": 0.1822, + "step": 10408, + "teacher_loss": 0.1826338768005371 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.32338032126426697, + "learning_rate": 2.7951597835936514e-05, + "loss": 0.2732, + "step": 10409, + "teacher_loss": 0.26763537526130676 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.35308346152305603, + "learning_rate": 2.7950451951452542e-05, + "loss": 0.2312, + "step": 10410, + "teacher_loss": 0.21765124797821045 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.31419676542282104, + "learning_rate": 2.7949305770053536e-05, + "loss": 0.2341, + "step": 10411, + "teacher_loss": 0.22517862915992737 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.8434735536575317, + "learning_rate": 2.7948159291765773e-05, + "loss": 0.3444, + "step": 10412, + "teacher_loss": 0.2889789342880249 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 1.0540509223937988, + "learning_rate": 2.7947012516615533e-05, + "loss": 0.4657, + "step": 10413, + "teacher_loss": 0.40034109354019165 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.7415398955345154, + "learning_rate": 2.794586544462912e-05, + "loss": 0.2701, + "step": 10414, + "teacher_loss": 0.21771132946014404 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.416032075881958, + "learning_rate": 2.7944718075832823e-05, + "loss": 0.2403, + "step": 10415, + "teacher_loss": 0.22082647681236267 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.13640457391738892, + "learning_rate": 2.7943570410252953e-05, + "loss": 0.205, + "step": 10416, + "teacher_loss": 0.21265248954296112 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.3997036814689636, + "learning_rate": 2.7942422447915816e-05, + "loss": 0.2162, + "step": 10417, + "teacher_loss": 0.1958284080028534 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.2261110246181488, + "learning_rate": 2.794127418884774e-05, + "loss": 0.2245, + "step": 10418, + "teacher_loss": 0.22432559728622437 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.7721251249313354, + "learning_rate": 2.7940125633075046e-05, + "loss": 0.2641, + "step": 10419, + "teacher_loss": 0.2077070027589798 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6084580421447754, + "learning_rate": 2.7938976780624066e-05, + "loss": 0.285, + "step": 10420, + "teacher_loss": 0.24908414483070374 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5722681283950806, + "learning_rate": 2.7937827631521137e-05, + "loss": 0.2132, + "step": 10421, + "teacher_loss": 0.17327998578548431 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5443324446678162, + "learning_rate": 2.7936678185792618e-05, + "loss": 0.4083, + "step": 10422, + "teacher_loss": 0.3931610882282257 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.33623749017715454, + "learning_rate": 2.7935528443464852e-05, + "loss": 0.2541, + "step": 10423, + "teacher_loss": 0.24501970410346985 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6410212516784668, + "learning_rate": 2.7934378404564197e-05, + "loss": 0.5322, + "step": 10424, + "teacher_loss": 0.5200929045677185 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.5043405294418335, + "learning_rate": 2.7933228069117033e-05, + "loss": 0.2761, + "step": 10425, + "teacher_loss": 0.25071409344673157 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.4749714732170105, + "learning_rate": 2.793207743714972e-05, + "loss": 0.288, + "step": 10426, + "teacher_loss": 0.2672499418258667 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.45844417810440063, + "learning_rate": 2.7930926508688647e-05, + "loss": 0.2645, + "step": 10427, + "teacher_loss": 0.2429664134979248 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6301224231719971, + "learning_rate": 2.7929775283760194e-05, + "loss": 0.3092, + "step": 10428, + "teacher_loss": 0.27352964878082275 + }, + { + "compression_loss": 0.0, + "epoch": 1.88, + "label_loss": 0.6505568623542786, + "learning_rate": 2.792862376239076e-05, + "loss": 0.3993, + "step": 10429, + "teacher_loss": 0.37138664722442627 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.42053818702697754, + "learning_rate": 2.7927471944606746e-05, + "loss": 0.2485, + "step": 10430, + "teacher_loss": 0.2294258177280426 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6608172655105591, + "learning_rate": 2.792631983043456e-05, + "loss": 0.3771, + "step": 10431, + "teacher_loss": 0.34556469321250916 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6221284866333008, + "learning_rate": 2.7925167419900613e-05, + "loss": 0.3048, + "step": 10432, + "teacher_loss": 0.2695158123970032 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.8512316942214966, + "learning_rate": 2.792401471303133e-05, + "loss": 0.4372, + "step": 10433, + "teacher_loss": 0.3912391662597656 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6226814985275269, + "learning_rate": 2.7922861709853136e-05, + "loss": 0.5006, + "step": 10434, + "teacher_loss": 0.4870661497116089 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.3661653995513916, + "learning_rate": 2.792170841039247e-05, + "loss": 0.3152, + "step": 10435, + "teacher_loss": 0.3094923198223114 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 1.2706000804901123, + "learning_rate": 2.7920554814675773e-05, + "loss": 0.4361, + "step": 10436, + "teacher_loss": 0.34343206882476807 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.23582251369953156, + "learning_rate": 2.7919400922729486e-05, + "loss": 0.1961, + "step": 10437, + "teacher_loss": 0.19164887070655823 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.8399091958999634, + "learning_rate": 2.7918246734580076e-05, + "loss": 0.3299, + "step": 10438, + "teacher_loss": 0.27321189641952515 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.15048521757125854, + "learning_rate": 2.7917092250253996e-05, + "loss": 0.1921, + "step": 10439, + "teacher_loss": 0.19667935371398926 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.7713289856910706, + "learning_rate": 2.791593746977772e-05, + "loss": 0.3283, + "step": 10440, + "teacher_loss": 0.2790627181529999 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.429765522480011, + "learning_rate": 2.7914782393177722e-05, + "loss": 0.2605, + "step": 10441, + "teacher_loss": 0.2417023479938507 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.28593653440475464, + "learning_rate": 2.791362702048048e-05, + "loss": 0.1619, + "step": 10442, + "teacher_loss": 0.14812614023685455 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.62589031457901, + "learning_rate": 2.7912471351712493e-05, + "loss": 0.3203, + "step": 10443, + "teacher_loss": 0.2863495647907257 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6412720084190369, + "learning_rate": 2.791131538690025e-05, + "loss": 0.4152, + "step": 10444, + "teacher_loss": 0.39011329412460327 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.310546875, + "learning_rate": 2.7910159126070257e-05, + "loss": 0.2509, + "step": 10445, + "teacher_loss": 0.2442292422056198 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 1.2569811344146729, + "learning_rate": 2.790900256924902e-05, + "loss": 0.3173, + "step": 10446, + "teacher_loss": 0.21285909414291382 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.32656437158584595, + "learning_rate": 2.7907845716463056e-05, + "loss": 0.2384, + "step": 10447, + "teacher_loss": 0.22856619954109192 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.4360158443450928, + "learning_rate": 2.7906688567738892e-05, + "loss": 0.2192, + "step": 10448, + "teacher_loss": 0.19515354931354523 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.1979067325592041, + "learning_rate": 2.790553112310305e-05, + "loss": 0.2692, + "step": 10449, + "teacher_loss": 0.27710720896720886 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.44713491201400757, + "learning_rate": 2.7904373382582078e-05, + "loss": 0.3155, + "step": 10450, + "teacher_loss": 0.30083832144737244 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6276953220367432, + "learning_rate": 2.7903215346202513e-05, + "loss": 0.2888, + "step": 10451, + "teacher_loss": 0.2511064112186432 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.5988336205482483, + "learning_rate": 2.79020570139909e-05, + "loss": 0.2958, + "step": 10452, + "teacher_loss": 0.26213592290878296 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.3847898542881012, + "learning_rate": 2.790089838597381e-05, + "loss": 0.2245, + "step": 10453, + "teacher_loss": 0.20670177042484283 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.17812632024288177, + "learning_rate": 2.7899739462177795e-05, + "loss": 0.1741, + "step": 10454, + "teacher_loss": 0.1737053096294403 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.3007696866989136, + "learning_rate": 2.789858024262943e-05, + "loss": 0.2046, + "step": 10455, + "teacher_loss": 0.1939181089401245 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.41024863719940186, + "learning_rate": 2.7897420727355292e-05, + "loss": 0.2368, + "step": 10456, + "teacher_loss": 0.21757131814956665 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.46183979511260986, + "learning_rate": 2.7896260916381967e-05, + "loss": 0.387, + "step": 10457, + "teacher_loss": 0.3786503076553345 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.5082702040672302, + "learning_rate": 2.7895100809736037e-05, + "loss": 0.2701, + "step": 10458, + "teacher_loss": 0.24366092681884766 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.5223250389099121, + "learning_rate": 2.7893940407444115e-05, + "loss": 0.2245, + "step": 10459, + "teacher_loss": 0.19139716029167175 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.4336528182029724, + "learning_rate": 2.78927797095328e-05, + "loss": 0.2128, + "step": 10460, + "teacher_loss": 0.18821988999843597 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.12058216333389282, + "learning_rate": 2.7891618716028692e-05, + "loss": 0.2647, + "step": 10461, + "teacher_loss": 0.2807001769542694 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.503341555595398, + "learning_rate": 2.789045742695842e-05, + "loss": 0.3089, + "step": 10462, + "teacher_loss": 0.28729701042175293 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.2901851534843445, + "learning_rate": 2.788929584234861e-05, + "loss": 0.2896, + "step": 10463, + "teacher_loss": 0.28953447937965393 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.13408219814300537, + "learning_rate": 2.788813396222589e-05, + "loss": 0.1846, + "step": 10464, + "teacher_loss": 0.1901823729276657 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6426858305931091, + "learning_rate": 2.7886971786616896e-05, + "loss": 0.2987, + "step": 10465, + "teacher_loss": 0.26052331924438477 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.573448657989502, + "learning_rate": 2.788580931554828e-05, + "loss": 0.3034, + "step": 10466, + "teacher_loss": 0.27336758375167847 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.22638225555419922, + "learning_rate": 2.788464654904669e-05, + "loss": 0.2141, + "step": 10467, + "teacher_loss": 0.21275544166564941 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.4824391305446625, + "learning_rate": 2.788348348713878e-05, + "loss": 0.2617, + "step": 10468, + "teacher_loss": 0.23719602823257446 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6467758417129517, + "learning_rate": 2.7882320129851222e-05, + "loss": 0.4914, + "step": 10469, + "teacher_loss": 0.4741586446762085 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.4561729431152344, + "learning_rate": 2.788115647721069e-05, + "loss": 0.3009, + "step": 10470, + "teacher_loss": 0.28369176387786865 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.32698681950569153, + "learning_rate": 2.7879992529243853e-05, + "loss": 0.2066, + "step": 10471, + "teacher_loss": 0.1932152360677719 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.30247852206230164, + "learning_rate": 2.787882828597741e-05, + "loss": 0.2212, + "step": 10472, + "teacher_loss": 0.2121918797492981 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6278198957443237, + "learning_rate": 2.7877663747438045e-05, + "loss": 0.2772, + "step": 10473, + "teacher_loss": 0.23820774257183075 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.2564704120159149, + "learning_rate": 2.787649891365246e-05, + "loss": 0.338, + "step": 10474, + "teacher_loss": 0.34708666801452637 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.2450713813304901, + "learning_rate": 2.787533378464736e-05, + "loss": 0.3123, + "step": 10475, + "teacher_loss": 0.31975603103637695 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.3978217840194702, + "learning_rate": 2.7874168360449457e-05, + "loss": 0.304, + "step": 10476, + "teacher_loss": 0.29355770349502563 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.5270134210586548, + "learning_rate": 2.7873002641085476e-05, + "loss": 0.2369, + "step": 10477, + "teacher_loss": 0.20466457307338715 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.5559606552124023, + "learning_rate": 2.7871836626582138e-05, + "loss": 0.3547, + "step": 10478, + "teacher_loss": 0.3322896361351013 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.9896951913833618, + "learning_rate": 2.7870670316966175e-05, + "loss": 0.3305, + "step": 10479, + "teacher_loss": 0.25729668140411377 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.3898169994354248, + "learning_rate": 2.7869503712264333e-05, + "loss": 0.2052, + "step": 10480, + "teacher_loss": 0.1846565455198288 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.30877548456192017, + "learning_rate": 2.7868336812503355e-05, + "loss": 0.1699, + "step": 10481, + "teacher_loss": 0.1544307917356491 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.6854501366615295, + "learning_rate": 2.7867169617709997e-05, + "loss": 0.3411, + "step": 10482, + "teacher_loss": 0.30285215377807617 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.4211253523826599, + "learning_rate": 2.7866002127911017e-05, + "loss": 0.2612, + "step": 10483, + "teacher_loss": 0.24343039095401764 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.7201869487762451, + "learning_rate": 2.786483434313318e-05, + "loss": 0.3114, + "step": 10484, + "teacher_loss": 0.26598456501960754 + }, + { + "compression_loss": 0.0, + "epoch": 1.89, + "label_loss": 0.2890753746032715, + "learning_rate": 2.7863666263403265e-05, + "loss": 0.2693, + "step": 10485, + "teacher_loss": 0.2670706808567047 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.47620460391044617, + "learning_rate": 2.7862497888748047e-05, + "loss": 0.2746, + "step": 10486, + "teacher_loss": 0.25219297409057617 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5559215545654297, + "learning_rate": 2.7861329219194315e-05, + "loss": 0.4046, + "step": 10487, + "teacher_loss": 0.38774412870407104 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.39769506454467773, + "learning_rate": 2.786016025476887e-05, + "loss": 0.1936, + "step": 10488, + "teacher_loss": 0.1709231436252594 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.33549511432647705, + "learning_rate": 2.7858990995498505e-05, + "loss": 0.2337, + "step": 10489, + "teacher_loss": 0.222381591796875 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.3062931299209595, + "learning_rate": 2.7857821441410028e-05, + "loss": 0.3508, + "step": 10490, + "teacher_loss": 0.35575586557388306 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.22177183628082275, + "learning_rate": 2.7856651592530256e-05, + "loss": 0.1711, + "step": 10491, + "teacher_loss": 0.16542232036590576 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.32008400559425354, + "learning_rate": 2.7855481448886007e-05, + "loss": 0.243, + "step": 10492, + "teacher_loss": 0.23448795080184937 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5326224565505981, + "learning_rate": 2.7854311010504115e-05, + "loss": 0.3242, + "step": 10493, + "teacher_loss": 0.301089882850647 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.40941372513771057, + "learning_rate": 2.785314027741141e-05, + "loss": 0.2558, + "step": 10494, + "teacher_loss": 0.2386990785598755 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.3746057152748108, + "learning_rate": 2.785196924963473e-05, + "loss": 0.328, + "step": 10495, + "teacher_loss": 0.322842538356781 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.7561874389648438, + "learning_rate": 2.785079792720093e-05, + "loss": 0.3341, + "step": 10496, + "teacher_loss": 0.2872387766838074 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.31002944707870483, + "learning_rate": 2.7849626310136865e-05, + "loss": 0.2158, + "step": 10497, + "teacher_loss": 0.20538225769996643 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.21120533347129822, + "learning_rate": 2.784845439846939e-05, + "loss": 0.2138, + "step": 10498, + "teacher_loss": 0.21411120891571045 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5520175695419312, + "learning_rate": 2.7847282192225377e-05, + "loss": 0.2445, + "step": 10499, + "teacher_loss": 0.21038463711738586 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.437430739402771, + "learning_rate": 2.7846109691431706e-05, + "loss": 0.2297, + "step": 10500, + "teacher_loss": 0.2066144049167633 + }, + { + "epoch": 1.9, + "eval_exact_match": 79.29990539262063, + "eval_f1": 86.71744553450758, + "step": 10500 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.8003648519515991, + "learning_rate": 2.784493689611525e-05, + "loss": 0.3126, + "step": 10501, + "teacher_loss": 0.2583557963371277 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.8878642916679382, + "learning_rate": 2.7843763806302905e-05, + "loss": 0.3222, + "step": 10502, + "teacher_loss": 0.2593066096305847 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.13957646489143372, + "learning_rate": 2.784259042202156e-05, + "loss": 0.2431, + "step": 10503, + "teacher_loss": 0.25465136766433716 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.7951029539108276, + "learning_rate": 2.7841416743298124e-05, + "loss": 0.3199, + "step": 10504, + "teacher_loss": 0.2671493887901306 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.45762503147125244, + "learning_rate": 2.78402427701595e-05, + "loss": 0.195, + "step": 10505, + "teacher_loss": 0.16583159565925598 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.45093607902526855, + "learning_rate": 2.7839068502632612e-05, + "loss": 0.2746, + "step": 10506, + "teacher_loss": 0.2549722492694855 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.3662753403186798, + "learning_rate": 2.783789394074437e-05, + "loss": 0.2446, + "step": 10507, + "teacher_loss": 0.23113563656806946 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.6336041688919067, + "learning_rate": 2.7836719084521714e-05, + "loss": 0.3977, + "step": 10508, + "teacher_loss": 0.37149134278297424 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.1949794888496399, + "learning_rate": 2.7835543933991575e-05, + "loss": 0.2921, + "step": 10509, + "teacher_loss": 0.30288100242614746 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.32303255796432495, + "learning_rate": 2.7834368489180895e-05, + "loss": 0.3329, + "step": 10510, + "teacher_loss": 0.3340139389038086 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.3003106713294983, + "learning_rate": 2.7833192750116628e-05, + "loss": 0.2947, + "step": 10511, + "teacher_loss": 0.29403382539749146 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.16023391485214233, + "learning_rate": 2.7832016716825722e-05, + "loss": 0.2101, + "step": 10512, + "teacher_loss": 0.21561364829540253 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.2220589518547058, + "learning_rate": 2.7830840389335148e-05, + "loss": 0.2487, + "step": 10513, + "teacher_loss": 0.2516571283340454 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5362939834594727, + "learning_rate": 2.7829663767671873e-05, + "loss": 0.2833, + "step": 10514, + "teacher_loss": 0.25518855452537537 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5730119943618774, + "learning_rate": 2.7828486851862873e-05, + "loss": 0.2946, + "step": 10515, + "teacher_loss": 0.2636498808860779 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5284982919692993, + "learning_rate": 2.7827309641935132e-05, + "loss": 0.2796, + "step": 10516, + "teacher_loss": 0.25189077854156494 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.6837352514266968, + "learning_rate": 2.782613213791564e-05, + "loss": 0.3407, + "step": 10517, + "teacher_loss": 0.3025929629802704 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.8064262866973877, + "learning_rate": 2.782495433983139e-05, + "loss": 0.3103, + "step": 10518, + "teacher_loss": 0.25512173771858215 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.6300204396247864, + "learning_rate": 2.7823776247709392e-05, + "loss": 0.2524, + "step": 10519, + "teacher_loss": 0.21046525239944458 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.2278192937374115, + "learning_rate": 2.7822597861576647e-05, + "loss": 0.1827, + "step": 10520, + "teacher_loss": 0.17764177918434143 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.3098452091217041, + "learning_rate": 2.782141918146018e-05, + "loss": 0.1461, + "step": 10521, + "teacher_loss": 0.1278894990682602 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.26042449474334717, + "learning_rate": 2.7820240207387016e-05, + "loss": 0.2112, + "step": 10522, + "teacher_loss": 0.20569732785224915 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.27227064967155457, + "learning_rate": 2.7819060939384174e-05, + "loss": 0.1871, + "step": 10523, + "teacher_loss": 0.17766734957695007 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.4729669392108917, + "learning_rate": 2.7817881377478703e-05, + "loss": 0.3574, + "step": 10524, + "teacher_loss": 0.34455347061157227 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.428869366645813, + "learning_rate": 2.781670152169764e-05, + "loss": 0.3052, + "step": 10525, + "teacher_loss": 0.2914574146270752 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.9285493493080139, + "learning_rate": 2.7815521372068037e-05, + "loss": 0.4236, + "step": 10526, + "teacher_loss": 0.36745762825012207 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.41628456115722656, + "learning_rate": 2.7814340928616953e-05, + "loss": 0.3712, + "step": 10527, + "teacher_loss": 0.3662249445915222 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5386245250701904, + "learning_rate": 2.781316019137145e-05, + "loss": 0.2364, + "step": 10528, + "teacher_loss": 0.20283028483390808 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.2850113809108734, + "learning_rate": 2.78119791603586e-05, + "loss": 0.249, + "step": 10529, + "teacher_loss": 0.24501727521419525 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 1.0109288692474365, + "learning_rate": 2.781079783560548e-05, + "loss": 0.4834, + "step": 10530, + "teacher_loss": 0.42484021186828613 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5316810607910156, + "learning_rate": 2.7809616217139176e-05, + "loss": 0.2477, + "step": 10531, + "teacher_loss": 0.21614117920398712 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.9535702466964722, + "learning_rate": 2.7808434304986775e-05, + "loss": 0.3876, + "step": 10532, + "teacher_loss": 0.3247010409832001 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.33875399827957153, + "learning_rate": 2.7807252099175377e-05, + "loss": 0.2288, + "step": 10533, + "teacher_loss": 0.21658799052238464 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.35775652527809143, + "learning_rate": 2.7806069599732086e-05, + "loss": 0.2503, + "step": 10534, + "teacher_loss": 0.2383839190006256 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5329631567001343, + "learning_rate": 2.7804886806684015e-05, + "loss": 0.2882, + "step": 10535, + "teacher_loss": 0.26096785068511963 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.9603796005249023, + "learning_rate": 2.780370372005828e-05, + "loss": 0.4191, + "step": 10536, + "teacher_loss": 0.3589169383049011 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.48883798718452454, + "learning_rate": 2.780252033988201e-05, + "loss": 0.3499, + "step": 10537, + "teacher_loss": 0.33448800444602966 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.5582314729690552, + "learning_rate": 2.780133666618233e-05, + "loss": 0.2743, + "step": 10538, + "teacher_loss": 0.24276627600193024 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.6345632672309875, + "learning_rate": 2.7800152698986378e-05, + "loss": 0.2624, + "step": 10539, + "teacher_loss": 0.22107091546058655 + }, + { + "compression_loss": 0.0, + "epoch": 1.9, + "label_loss": 0.3248680830001831, + "learning_rate": 2.7798968438321307e-05, + "loss": 0.2283, + "step": 10540, + "teacher_loss": 0.21760046482086182 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.40187469124794006, + "learning_rate": 2.7797783884214258e-05, + "loss": 0.2637, + "step": 10541, + "teacher_loss": 0.24838611483573914 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.2949375510215759, + "learning_rate": 2.7796599036692398e-05, + "loss": 0.3876, + "step": 10542, + "teacher_loss": 0.3979080319404602 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.4472096264362335, + "learning_rate": 2.7795413895782885e-05, + "loss": 0.2818, + "step": 10543, + "teacher_loss": 0.26337242126464844 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5137739777565002, + "learning_rate": 2.7794228461512897e-05, + "loss": 0.4909, + "step": 10544, + "teacher_loss": 0.48834359645843506 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.23226553201675415, + "learning_rate": 2.7793042733909608e-05, + "loss": 0.2673, + "step": 10545, + "teacher_loss": 0.2712154984474182 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.9990419149398804, + "learning_rate": 2.7791856713000202e-05, + "loss": 0.4343, + "step": 10546, + "teacher_loss": 0.3715938925743103 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.45345908403396606, + "learning_rate": 2.7790670398811876e-05, + "loss": 0.3069, + "step": 10547, + "teacher_loss": 0.290637731552124 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.2902598977088928, + "learning_rate": 2.778948379137183e-05, + "loss": 0.2535, + "step": 10548, + "teacher_loss": 0.24943237006664276 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.7131874561309814, + "learning_rate": 2.7788296890707255e-05, + "loss": 0.4209, + "step": 10549, + "teacher_loss": 0.38841935992240906 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.28807175159454346, + "learning_rate": 2.7787109696845385e-05, + "loss": 0.2219, + "step": 10550, + "teacher_loss": 0.21449819207191467 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.6555964946746826, + "learning_rate": 2.778592220981342e-05, + "loss": 0.3064, + "step": 10551, + "teacher_loss": 0.26759546995162964 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.35463422536849976, + "learning_rate": 2.7784734429638595e-05, + "loss": 0.2605, + "step": 10552, + "teacher_loss": 0.25004664063453674 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.4276130795478821, + "learning_rate": 2.778354635634814e-05, + "loss": 0.2247, + "step": 10553, + "teacher_loss": 0.20213119685649872 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5391087532043457, + "learning_rate": 2.7782357989969296e-05, + "loss": 0.2347, + "step": 10554, + "teacher_loss": 0.20087826251983643 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.30259203910827637, + "learning_rate": 2.7781169330529308e-05, + "loss": 0.2255, + "step": 10555, + "teacher_loss": 0.2168789505958557 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5337007641792297, + "learning_rate": 2.7779980378055423e-05, + "loss": 0.298, + "step": 10556, + "teacher_loss": 0.27177825570106506 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.4964728057384491, + "learning_rate": 2.7778791132574908e-05, + "loss": 0.2218, + "step": 10557, + "teacher_loss": 0.19133460521697998 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.6402260065078735, + "learning_rate": 2.7777601594115024e-05, + "loss": 0.282, + "step": 10558, + "teacher_loss": 0.2422443926334381 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 1.1095409393310547, + "learning_rate": 2.777641176270304e-05, + "loss": 0.3286, + "step": 10559, + "teacher_loss": 0.24179603159427643 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.8538591861724854, + "learning_rate": 2.7775221638366247e-05, + "loss": 0.4287, + "step": 10560, + "teacher_loss": 0.3814762234687805 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.4244820475578308, + "learning_rate": 2.777403122113192e-05, + "loss": 0.2989, + "step": 10561, + "teacher_loss": 0.28497397899627686 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5642846822738647, + "learning_rate": 2.7772840511027356e-05, + "loss": 0.2683, + "step": 10562, + "teacher_loss": 0.23545873165130615 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.42701101303100586, + "learning_rate": 2.7771649508079853e-05, + "loss": 0.2681, + "step": 10563, + "teacher_loss": 0.2504361867904663 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5108298063278198, + "learning_rate": 2.7770458212316723e-05, + "loss": 0.4661, + "step": 10564, + "teacher_loss": 0.46109727025032043 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.2883630096912384, + "learning_rate": 2.776926662376527e-05, + "loss": 0.3397, + "step": 10565, + "teacher_loss": 0.34536051750183105 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.32484641671180725, + "learning_rate": 2.7768074742452816e-05, + "loss": 0.2358, + "step": 10566, + "teacher_loss": 0.22593890130519867 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.4530596435070038, + "learning_rate": 2.776688256840669e-05, + "loss": 0.3108, + "step": 10567, + "teacher_loss": 0.29496753215789795 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.47237902879714966, + "learning_rate": 2.776569010165423e-05, + "loss": 0.5336, + "step": 10568, + "teacher_loss": 0.540449857711792 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.3347229063510895, + "learning_rate": 2.7764497342222758e-05, + "loss": 0.5179, + "step": 10569, + "teacher_loss": 0.5383025407791138 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.34465712308883667, + "learning_rate": 2.776330429013964e-05, + "loss": 0.2613, + "step": 10570, + "teacher_loss": 0.25205036997795105 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5259889364242554, + "learning_rate": 2.7762110945432223e-05, + "loss": 0.267, + "step": 10571, + "teacher_loss": 0.23825687170028687 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.6760830879211426, + "learning_rate": 2.776091730812786e-05, + "loss": 0.33, + "step": 10572, + "teacher_loss": 0.29150718450546265 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5438052415847778, + "learning_rate": 2.775972337825392e-05, + "loss": 0.2747, + "step": 10573, + "teacher_loss": 0.24476662278175354 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.22737932205200195, + "learning_rate": 2.775852915583778e-05, + "loss": 0.1768, + "step": 10574, + "teacher_loss": 0.17114558815956116 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.4441859722137451, + "learning_rate": 2.7757334640906825e-05, + "loss": 0.4927, + "step": 10575, + "teacher_loss": 0.4981132447719574 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.386456161737442, + "learning_rate": 2.775613983348843e-05, + "loss": 0.2322, + "step": 10576, + "teacher_loss": 0.21507391333580017 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.3415631651878357, + "learning_rate": 2.7754944733609995e-05, + "loss": 0.2262, + "step": 10577, + "teacher_loss": 0.21341730654239655 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.3886233866214752, + "learning_rate": 2.7753749341298915e-05, + "loss": 0.2537, + "step": 10578, + "teacher_loss": 0.23874206840991974 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.6054654121398926, + "learning_rate": 2.7752553656582604e-05, + "loss": 0.2863, + "step": 10579, + "teacher_loss": 0.25084584951400757 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.45179420709609985, + "learning_rate": 2.775135767948847e-05, + "loss": 0.4904, + "step": 10580, + "teacher_loss": 0.49467790126800537 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5903551578521729, + "learning_rate": 2.775016141004394e-05, + "loss": 0.3082, + "step": 10581, + "teacher_loss": 0.27683669328689575 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.2951371669769287, + "learning_rate": 2.774896484827643e-05, + "loss": 0.2127, + "step": 10582, + "teacher_loss": 0.20355471968650818 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 1.4384177923202515, + "learning_rate": 2.774776799421338e-05, + "loss": 0.6478, + "step": 10583, + "teacher_loss": 0.5599298477172852 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.43202030658721924, + "learning_rate": 2.7746570847882234e-05, + "loss": 0.3099, + "step": 10584, + "teacher_loss": 0.29634588956832886 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.6725959777832031, + "learning_rate": 2.774537340931043e-05, + "loss": 0.2896, + "step": 10585, + "teacher_loss": 0.24701957404613495 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.9345403909683228, + "learning_rate": 2.7744175678525425e-05, + "loss": 0.3582, + "step": 10586, + "teacher_loss": 0.2941203713417053 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.5127602815628052, + "learning_rate": 2.7742977655554684e-05, + "loss": 0.2226, + "step": 10587, + "teacher_loss": 0.19034329056739807 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.2002761960029602, + "learning_rate": 2.774177934042567e-05, + "loss": 0.2174, + "step": 10588, + "teacher_loss": 0.21926885843276978 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 1.3526737689971924, + "learning_rate": 2.774058073316586e-05, + "loss": 0.4596, + "step": 10589, + "teacher_loss": 0.3604092001914978 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.1483670473098755, + "learning_rate": 2.7739381833802725e-05, + "loss": 0.2329, + "step": 10590, + "teacher_loss": 0.24230796098709106 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.795592188835144, + "learning_rate": 2.7738182642363765e-05, + "loss": 0.4822, + "step": 10591, + "teacher_loss": 0.44743263721466064 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.323878675699234, + "learning_rate": 2.7736983158876468e-05, + "loss": 0.2421, + "step": 10592, + "teacher_loss": 0.23298680782318115 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.38348716497421265, + "learning_rate": 2.7735783383368335e-05, + "loss": 0.3056, + "step": 10593, + "teacher_loss": 0.29693886637687683 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.6583057641983032, + "learning_rate": 2.7734583315866874e-05, + "loss": 0.3294, + "step": 10594, + "teacher_loss": 0.29281747341156006 + }, + { + "compression_loss": 0.0, + "epoch": 1.91, + "label_loss": 0.26391372084617615, + "learning_rate": 2.7733382956399594e-05, + "loss": 0.2102, + "step": 10595, + "teacher_loss": 0.20423074066638947 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.657230019569397, + "learning_rate": 2.773218230499402e-05, + "loss": 0.3208, + "step": 10596, + "teacher_loss": 0.2834717333316803 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4997221827507019, + "learning_rate": 2.773098136167768e-05, + "loss": 0.2627, + "step": 10597, + "teacher_loss": 0.23637765645980835 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.20968911051750183, + "learning_rate": 2.7729780126478108e-05, + "loss": 0.2646, + "step": 10598, + "teacher_loss": 0.270747572183609 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5338220000267029, + "learning_rate": 2.772857859942284e-05, + "loss": 0.2717, + "step": 10599, + "teacher_loss": 0.24255669116973877 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.9986376762390137, + "learning_rate": 2.7727376780539427e-05, + "loss": 0.3297, + "step": 10600, + "teacher_loss": 0.25542736053466797 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.38344448804855347, + "learning_rate": 2.7726174669855428e-05, + "loss": 0.3788, + "step": 10601, + "teacher_loss": 0.3783177137374878 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.2888924777507782, + "learning_rate": 2.7724972267398398e-05, + "loss": 0.2882, + "step": 10602, + "teacher_loss": 0.2880859375 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.250144898891449, + "learning_rate": 2.7723769573195902e-05, + "loss": 0.2802, + "step": 10603, + "teacher_loss": 0.28351420164108276 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4528656303882599, + "learning_rate": 2.7722566587275516e-05, + "loss": 0.6465, + "step": 10604, + "teacher_loss": 0.6680537462234497 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.7550205588340759, + "learning_rate": 2.7721363309664824e-05, + "loss": 0.5203, + "step": 10605, + "teacher_loss": 0.49422335624694824 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.2732846736907959, + "learning_rate": 2.772015974039141e-05, + "loss": 0.1851, + "step": 10606, + "teacher_loss": 0.17526564002037048 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.41088151931762695, + "learning_rate": 2.7718955879482878e-05, + "loss": 0.2192, + "step": 10607, + "teacher_loss": 0.19792217016220093 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4332578778266907, + "learning_rate": 2.7717751726966817e-05, + "loss": 0.2174, + "step": 10608, + "teacher_loss": 0.19336655735969543 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.7957656383514404, + "learning_rate": 2.7716547282870837e-05, + "loss": 0.2302, + "step": 10609, + "teacher_loss": 0.16738399863243103 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.43560680747032166, + "learning_rate": 2.7715342547222556e-05, + "loss": 0.3217, + "step": 10610, + "teacher_loss": 0.3090146780014038 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.2964370846748352, + "learning_rate": 2.7714137520049594e-05, + "loss": 0.2103, + "step": 10611, + "teacher_loss": 0.20075857639312744 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.6856546401977539, + "learning_rate": 2.7712932201379574e-05, + "loss": 0.3053, + "step": 10612, + "teacher_loss": 0.2630848288536072 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4563559293746948, + "learning_rate": 2.7711726591240133e-05, + "loss": 0.3349, + "step": 10613, + "teacher_loss": 0.32137441635131836 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4271870255470276, + "learning_rate": 2.7710520689658918e-05, + "loss": 0.2514, + "step": 10614, + "teacher_loss": 0.23182927072048187 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.2995910346508026, + "learning_rate": 2.770931449666357e-05, + "loss": 0.2308, + "step": 10615, + "teacher_loss": 0.22319768369197845 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.8412692546844482, + "learning_rate": 2.7708108012281746e-05, + "loss": 0.251, + "step": 10616, + "teacher_loss": 0.18543805181980133 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.38991835713386536, + "learning_rate": 2.7706901236541103e-05, + "loss": 0.2356, + "step": 10617, + "teacher_loss": 0.21850663423538208 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.21451202034950256, + "learning_rate": 2.7705694169469312e-05, + "loss": 0.1783, + "step": 10618, + "teacher_loss": 0.1742343008518219 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.6568318605422974, + "learning_rate": 2.770448681109405e-05, + "loss": 0.3313, + "step": 10619, + "teacher_loss": 0.2951800227165222 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.6399763822555542, + "learning_rate": 2.7703279161442994e-05, + "loss": 0.2687, + "step": 10620, + "teacher_loss": 0.227428138256073 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.6078928709030151, + "learning_rate": 2.7702071220543833e-05, + "loss": 0.368, + "step": 10621, + "teacher_loss": 0.3413432240486145 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.15240268409252167, + "learning_rate": 2.770086298842426e-05, + "loss": 0.202, + "step": 10622, + "teacher_loss": 0.20754998922348022 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 1.0056992769241333, + "learning_rate": 2.7699654465111984e-05, + "loss": 0.4125, + "step": 10623, + "teacher_loss": 0.34654057025909424 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4882792830467224, + "learning_rate": 2.7698445650634703e-05, + "loss": 0.3063, + "step": 10624, + "teacher_loss": 0.2860836982727051 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.364740252494812, + "learning_rate": 2.7697236545020133e-05, + "loss": 0.2484, + "step": 10625, + "teacher_loss": 0.23552259802818298 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.2086329311132431, + "learning_rate": 2.7696027148296e-05, + "loss": 0.2303, + "step": 10626, + "teacher_loss": 0.23269705474376678 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5273315906524658, + "learning_rate": 2.769481746049003e-05, + "loss": 0.3878, + "step": 10627, + "teacher_loss": 0.3722783029079437 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5774461627006531, + "learning_rate": 2.7693607481629955e-05, + "loss": 0.4907, + "step": 10628, + "teacher_loss": 0.4810274839401245 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5860852599143982, + "learning_rate": 2.7692397211743517e-05, + "loss": 0.4198, + "step": 10629, + "teacher_loss": 0.40136969089508057 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5135939717292786, + "learning_rate": 2.7691186650858465e-05, + "loss": 0.3206, + "step": 10630, + "teacher_loss": 0.29913073778152466 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.6275243759155273, + "learning_rate": 2.7689975799002556e-05, + "loss": 0.4623, + "step": 10631, + "teacher_loss": 0.44389382004737854 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.13481444120407104, + "learning_rate": 2.7688764656203546e-05, + "loss": 0.2235, + "step": 10632, + "teacher_loss": 0.23339098691940308 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.26117998361587524, + "learning_rate": 2.7687553222489206e-05, + "loss": 0.1834, + "step": 10633, + "teacher_loss": 0.17475026845932007 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5900856256484985, + "learning_rate": 2.7686341497887306e-05, + "loss": 0.288, + "step": 10634, + "teacher_loss": 0.2544292211532593 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.18976476788520813, + "learning_rate": 2.7685129482425636e-05, + "loss": 0.2665, + "step": 10635, + "teacher_loss": 0.2750202417373657 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.29199349880218506, + "learning_rate": 2.7683917176131978e-05, + "loss": 0.1813, + "step": 10636, + "teacher_loss": 0.16899548470973969 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.1816972941160202, + "learning_rate": 2.7682704579034128e-05, + "loss": 0.1943, + "step": 10637, + "teacher_loss": 0.19566355645656586 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.27330583333969116, + "learning_rate": 2.7681491691159882e-05, + "loss": 0.2263, + "step": 10638, + "teacher_loss": 0.2211175262928009 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.23129123449325562, + "learning_rate": 2.7680278512537058e-05, + "loss": 0.2263, + "step": 10639, + "teacher_loss": 0.22579103708267212 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.8670210242271423, + "learning_rate": 2.7679065043193464e-05, + "loss": 0.4133, + "step": 10640, + "teacher_loss": 0.36293596029281616 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.4552653729915619, + "learning_rate": 2.767785128315692e-05, + "loss": 0.2333, + "step": 10641, + "teacher_loss": 0.20867277681827545 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.49822381138801575, + "learning_rate": 2.767663723245526e-05, + "loss": 0.2376, + "step": 10642, + "teacher_loss": 0.20864242315292358 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.9551030993461609, + "learning_rate": 2.7675422891116316e-05, + "loss": 0.518, + "step": 10643, + "teacher_loss": 0.46938616037368774 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.5787302255630493, + "learning_rate": 2.767420825916792e-05, + "loss": 0.2861, + "step": 10644, + "teacher_loss": 0.25359150767326355 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.13817007839679718, + "learning_rate": 2.7672993336637936e-05, + "loss": 0.1637, + "step": 10645, + "teacher_loss": 0.16652154922485352 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.7598130702972412, + "learning_rate": 2.7671778123554207e-05, + "loss": 0.2883, + "step": 10646, + "teacher_loss": 0.23592320084571838 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.9243156313896179, + "learning_rate": 2.7670562619944598e-05, + "loss": 0.3948, + "step": 10647, + "teacher_loss": 0.33591240644454956 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.16597387194633484, + "learning_rate": 2.7669346825836973e-05, + "loss": 0.1757, + "step": 10648, + "teacher_loss": 0.17673049867153168 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 1.1268455982208252, + "learning_rate": 2.7668130741259216e-05, + "loss": 0.5314, + "step": 10649, + "teacher_loss": 0.46523338556289673 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.38264861702919006, + "learning_rate": 2.76669143662392e-05, + "loss": 0.1919, + "step": 10650, + "teacher_loss": 0.17071633040905 + }, + { + "compression_loss": 0.0, + "epoch": 1.92, + "label_loss": 0.7397134304046631, + "learning_rate": 2.766569770080481e-05, + "loss": 0.2938, + "step": 10651, + "teacher_loss": 0.24421042203903198 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.32062312960624695, + "learning_rate": 2.7664480744983954e-05, + "loss": 0.2378, + "step": 10652, + "teacher_loss": 0.2285531461238861 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.1934809535741806, + "learning_rate": 2.766326349880452e-05, + "loss": 0.3521, + "step": 10653, + "teacher_loss": 0.36968737840652466 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.7182405591011047, + "learning_rate": 2.766204596229442e-05, + "loss": 0.3669, + "step": 10654, + "teacher_loss": 0.3278920650482178 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.5720716118812561, + "learning_rate": 2.766082813548157e-05, + "loss": 0.2459, + "step": 10655, + "teacher_loss": 0.20968562364578247 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.4730564057826996, + "learning_rate": 2.765961001839389e-05, + "loss": 0.2883, + "step": 10656, + "teacher_loss": 0.2678139805793762 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.7037424445152283, + "learning_rate": 2.7658391611059307e-05, + "loss": 0.2993, + "step": 10657, + "teacher_loss": 0.2543885409832001 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6151723861694336, + "learning_rate": 2.7657172913505755e-05, + "loss": 0.2249, + "step": 10658, + "teacher_loss": 0.1815710812807083 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.27467262744903564, + "learning_rate": 2.765595392576118e-05, + "loss": 0.1838, + "step": 10659, + "teacher_loss": 0.1737385243177414 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6059149503707886, + "learning_rate": 2.7654734647853523e-05, + "loss": 0.3491, + "step": 10660, + "teacher_loss": 0.3205581307411194 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.33769217133522034, + "learning_rate": 2.7653515079810744e-05, + "loss": 0.2041, + "step": 10661, + "teacher_loss": 0.18930456042289734 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.42496931552886963, + "learning_rate": 2.7652295221660797e-05, + "loss": 0.2674, + "step": 10662, + "teacher_loss": 0.249863862991333 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.7890260815620422, + "learning_rate": 2.7651075073431656e-05, + "loss": 0.4072, + "step": 10663, + "teacher_loss": 0.3647833466529846 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.4786689281463623, + "learning_rate": 2.7649854635151296e-05, + "loss": 0.2214, + "step": 10664, + "teacher_loss": 0.1928357630968094 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.39120781421661377, + "learning_rate": 2.7648633906847692e-05, + "loss": 0.2, + "step": 10665, + "teacher_loss": 0.1787540316581726 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.49346041679382324, + "learning_rate": 2.764741288854884e-05, + "loss": 0.2902, + "step": 10666, + "teacher_loss": 0.2676636576652527 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.3133053481578827, + "learning_rate": 2.7646191580282724e-05, + "loss": 0.4404, + "step": 10667, + "teacher_loss": 0.454497367143631 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.2956196069717407, + "learning_rate": 2.7644969982077354e-05, + "loss": 0.1966, + "step": 10668, + "teacher_loss": 0.18561431765556335 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.2704859972000122, + "learning_rate": 2.764374809396073e-05, + "loss": 0.1823, + "step": 10669, + "teacher_loss": 0.17248108983039856 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.27046912908554077, + "learning_rate": 2.764252591596087e-05, + "loss": 0.2743, + "step": 10670, + "teacher_loss": 0.27475133538246155 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.38920360803604126, + "learning_rate": 2.76413034481058e-05, + "loss": 0.2646, + "step": 10671, + "teacher_loss": 0.25073009729385376 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6357411742210388, + "learning_rate": 2.764008069042354e-05, + "loss": 0.3259, + "step": 10672, + "teacher_loss": 0.2915067672729492 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 1.2737022638320923, + "learning_rate": 2.7638857642942127e-05, + "loss": 0.3027, + "step": 10673, + "teacher_loss": 0.19482949376106262 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.5013376474380493, + "learning_rate": 2.76376343056896e-05, + "loss": 0.2423, + "step": 10674, + "teacher_loss": 0.21347010135650635 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6647286415100098, + "learning_rate": 2.7636410678694008e-05, + "loss": 0.4326, + "step": 10675, + "teacher_loss": 0.40679818391799927 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.21355105936527252, + "learning_rate": 2.7635186761983407e-05, + "loss": 0.2713, + "step": 10676, + "teacher_loss": 0.2776651978492737 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.5686591863632202, + "learning_rate": 2.7633962555585857e-05, + "loss": 0.4565, + "step": 10677, + "teacher_loss": 0.4439891278743744 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.1340579390525818, + "learning_rate": 2.7632738059529423e-05, + "loss": 0.1565, + "step": 10678, + "teacher_loss": 0.1589725911617279 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 1.0708719491958618, + "learning_rate": 2.7631513273842178e-05, + "loss": 0.3542, + "step": 10679, + "teacher_loss": 0.2745916247367859 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.8099985122680664, + "learning_rate": 2.7630288198552206e-05, + "loss": 0.3889, + "step": 10680, + "teacher_loss": 0.34211480617523193 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.3791612386703491, + "learning_rate": 2.7629062833687593e-05, + "loss": 0.3181, + "step": 10681, + "teacher_loss": 0.31130141019821167 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6782591342926025, + "learning_rate": 2.7627837179276432e-05, + "loss": 0.4896, + "step": 10682, + "teacher_loss": 0.46863383054733276 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.22329875826835632, + "learning_rate": 2.7626611235346825e-05, + "loss": 0.2801, + "step": 10683, + "teacher_loss": 0.2864404320716858 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.21794909238815308, + "learning_rate": 2.7625385001926882e-05, + "loss": 0.1836, + "step": 10684, + "teacher_loss": 0.1798313856124878 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.11426588892936707, + "learning_rate": 2.762415847904471e-05, + "loss": 0.2043, + "step": 10685, + "teacher_loss": 0.21433210372924805 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.18531127274036407, + "learning_rate": 2.762293166672844e-05, + "loss": 0.1852, + "step": 10686, + "teacher_loss": 0.18519604206085205 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.28036683797836304, + "learning_rate": 2.7621704565006186e-05, + "loss": 0.1862, + "step": 10687, + "teacher_loss": 0.17576324939727783 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.403202086687088, + "learning_rate": 2.7620477173906087e-05, + "loss": 0.3646, + "step": 10688, + "teacher_loss": 0.3603616952896118 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6357541680335999, + "learning_rate": 2.7619249493456288e-05, + "loss": 0.281, + "step": 10689, + "teacher_loss": 0.24153506755828857 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.3377685546875, + "learning_rate": 2.761802152368493e-05, + "loss": 0.5692, + "step": 10690, + "teacher_loss": 0.594910740852356 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.3280877470970154, + "learning_rate": 2.7616793264620174e-05, + "loss": 0.4354, + "step": 10691, + "teacher_loss": 0.447316437959671 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.6290035843849182, + "learning_rate": 2.7615564716290175e-05, + "loss": 0.3872, + "step": 10692, + "teacher_loss": 0.3603074848651886 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.3782510757446289, + "learning_rate": 2.7614335878723096e-05, + "loss": 0.2437, + "step": 10693, + "teacher_loss": 0.22869972884655 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.3793245553970337, + "learning_rate": 2.761310675194712e-05, + "loss": 0.2582, + "step": 10694, + "teacher_loss": 0.24470940232276917 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.5836122632026672, + "learning_rate": 2.7611877335990414e-05, + "loss": 0.2663, + "step": 10695, + "teacher_loss": 0.23105937242507935 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.7853186130523682, + "learning_rate": 2.761064763088118e-05, + "loss": 0.426, + "step": 10696, + "teacher_loss": 0.386123925447464 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.69758540391922, + "learning_rate": 2.7609417636647602e-05, + "loss": 0.2794, + "step": 10697, + "teacher_loss": 0.23296789824962616 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.5496902465820312, + "learning_rate": 2.7608187353317885e-05, + "loss": 0.2693, + "step": 10698, + "teacher_loss": 0.23816293478012085 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.40676063299179077, + "learning_rate": 2.7606956780920228e-05, + "loss": 0.1883, + "step": 10699, + "teacher_loss": 0.1640104055404663 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.5043829679489136, + "learning_rate": 2.7605725919482857e-05, + "loss": 0.4777, + "step": 10700, + "teacher_loss": 0.47476130723953247 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.7403748035430908, + "learning_rate": 2.7604494769033983e-05, + "loss": 0.3141, + "step": 10701, + "teacher_loss": 0.26676928997039795 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.27056220173835754, + "learning_rate": 2.7603263329601834e-05, + "loss": 0.2425, + "step": 10702, + "teacher_loss": 0.23932921886444092 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.4659106135368347, + "learning_rate": 2.7602031601214637e-05, + "loss": 0.2675, + "step": 10703, + "teacher_loss": 0.245440274477005 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.44203001260757446, + "learning_rate": 2.7600799583900643e-05, + "loss": 0.3115, + "step": 10704, + "teacher_loss": 0.2970461845397949 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.9715584516525269, + "learning_rate": 2.7599567277688095e-05, + "loss": 0.5871, + "step": 10705, + "teacher_loss": 0.5443358421325684 + }, + { + "compression_loss": 0.0, + "epoch": 1.93, + "label_loss": 0.47124379873275757, + "learning_rate": 2.759833468260524e-05, + "loss": 0.2301, + "step": 10706, + "teacher_loss": 0.20329716801643372 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.4204053282737732, + "learning_rate": 2.7597101798680353e-05, + "loss": 0.2685, + "step": 10707, + "teacher_loss": 0.25164565443992615 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3771471083164215, + "learning_rate": 2.759586862594168e-05, + "loss": 0.2421, + "step": 10708, + "teacher_loss": 0.2271193265914917 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.46310678124427795, + "learning_rate": 2.759463516441751e-05, + "loss": 0.2345, + "step": 10709, + "teacher_loss": 0.20906318724155426 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.2895175814628601, + "learning_rate": 2.759340141413611e-05, + "loss": 0.1482, + "step": 10710, + "teacher_loss": 0.1325358748435974 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 1.021508812904358, + "learning_rate": 2.7592167375125772e-05, + "loss": 0.3724, + "step": 10711, + "teacher_loss": 0.300296813249588 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3177911639213562, + "learning_rate": 2.7590933047414798e-05, + "loss": 0.2528, + "step": 10712, + "teacher_loss": 0.2455936074256897 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.46502798795700073, + "learning_rate": 2.758969843103147e-05, + "loss": 0.3541, + "step": 10713, + "teacher_loss": 0.3418126702308655 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5373362302780151, + "learning_rate": 2.7588463526004107e-05, + "loss": 0.2617, + "step": 10714, + "teacher_loss": 0.2310972809791565 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.47000786662101746, + "learning_rate": 2.758722833236102e-05, + "loss": 0.29, + "step": 10715, + "teacher_loss": 0.2700064778327942 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.7327920198440552, + "learning_rate": 2.758599285013052e-05, + "loss": 0.3533, + "step": 10716, + "teacher_loss": 0.3111203610897064 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3775298297405243, + "learning_rate": 2.758475707934094e-05, + "loss": 0.3367, + "step": 10717, + "teacher_loss": 0.3322066068649292 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.4450759291648865, + "learning_rate": 2.7583521020020615e-05, + "loss": 0.3348, + "step": 10718, + "teacher_loss": 0.32251012325286865 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.23235630989074707, + "learning_rate": 2.7582284672197874e-05, + "loss": 0.1771, + "step": 10719, + "teacher_loss": 0.17092543840408325 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3645136058330536, + "learning_rate": 2.758104803590108e-05, + "loss": 0.23, + "step": 10720, + "teacher_loss": 0.21503706276416779 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5048770904541016, + "learning_rate": 2.7579811111158563e-05, + "loss": 0.3341, + "step": 10721, + "teacher_loss": 0.3151768445968628 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3180050849914551, + "learning_rate": 2.75785738979987e-05, + "loss": 0.2164, + "step": 10722, + "teacher_loss": 0.20505811274051666 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.29039496183395386, + "learning_rate": 2.7577336396449844e-05, + "loss": 0.2062, + "step": 10723, + "teacher_loss": 0.19682344794273376 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3539835214614868, + "learning_rate": 2.757609860654038e-05, + "loss": 0.2739, + "step": 10724, + "teacher_loss": 0.265034556388855 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.8956050872802734, + "learning_rate": 2.7574860528298677e-05, + "loss": 0.4742, + "step": 10725, + "teacher_loss": 0.42738524079322815 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.1452442854642868, + "learning_rate": 2.7573622161753125e-05, + "loss": 0.2387, + "step": 10726, + "teacher_loss": 0.24911123514175415 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.28939560055732727, + "learning_rate": 2.7572383506932113e-05, + "loss": 0.1977, + "step": 10727, + "teacher_loss": 0.18749216198921204 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.6320517659187317, + "learning_rate": 2.757114456386404e-05, + "loss": 0.5318, + "step": 10728, + "teacher_loss": 0.5206484198570251 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.45532482862472534, + "learning_rate": 2.7569905332577314e-05, + "loss": 0.2399, + "step": 10729, + "teacher_loss": 0.2160104513168335 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.18439623713493347, + "learning_rate": 2.7568665813100347e-05, + "loss": 0.2493, + "step": 10730, + "teacher_loss": 0.25647836923599243 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.2852588891983032, + "learning_rate": 2.756742600546155e-05, + "loss": 0.3357, + "step": 10731, + "teacher_loss": 0.3413543701171875 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.38660377264022827, + "learning_rate": 2.756618590968936e-05, + "loss": 0.2268, + "step": 10732, + "teacher_loss": 0.20905235409736633 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.4000321924686432, + "learning_rate": 2.7564945525812203e-05, + "loss": 0.2147, + "step": 10733, + "teacher_loss": 0.194134920835495 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5205478072166443, + "learning_rate": 2.7563704853858507e-05, + "loss": 0.2899, + "step": 10734, + "teacher_loss": 0.26430654525756836 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.6222261190414429, + "learning_rate": 2.7562463893856737e-05, + "loss": 0.4603, + "step": 10735, + "teacher_loss": 0.44234639406204224 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.35669898986816406, + "learning_rate": 2.756122264583533e-05, + "loss": 0.2462, + "step": 10736, + "teacher_loss": 0.23389463126659393 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.2117297649383545, + "learning_rate": 2.755998110982275e-05, + "loss": 0.2162, + "step": 10737, + "teacher_loss": 0.21667510271072388 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3946285843849182, + "learning_rate": 2.755873928584746e-05, + "loss": 0.2203, + "step": 10738, + "teacher_loss": 0.20094534754753113 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.33045852184295654, + "learning_rate": 2.7557497173937928e-05, + "loss": 0.1902, + "step": 10739, + "teacher_loss": 0.17463496327400208 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.41973817348480225, + "learning_rate": 2.7556254774122638e-05, + "loss": 0.4123, + "step": 10740, + "teacher_loss": 0.41146600246429443 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.32989501953125, + "learning_rate": 2.7555012086430072e-05, + "loss": 0.234, + "step": 10741, + "teacher_loss": 0.22339129447937012 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.558845043182373, + "learning_rate": 2.755376911088872e-05, + "loss": 0.2379, + "step": 10742, + "teacher_loss": 0.20223954319953918 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 1.1454236507415771, + "learning_rate": 2.755252584752708e-05, + "loss": 0.2829, + "step": 10743, + "teacher_loss": 0.18701830506324768 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.7674243450164795, + "learning_rate": 2.7551282296373656e-05, + "loss": 0.3089, + "step": 10744, + "teacher_loss": 0.2579955458641052 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5170470476150513, + "learning_rate": 2.755003845745696e-05, + "loss": 0.4112, + "step": 10745, + "teacher_loss": 0.3994525671005249 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.43625807762145996, + "learning_rate": 2.754879433080551e-05, + "loss": 0.2407, + "step": 10746, + "teacher_loss": 0.21898235380649567 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.19233836233615875, + "learning_rate": 2.754754991644783e-05, + "loss": 0.1981, + "step": 10747, + "teacher_loss": 0.19878417253494263 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.8407419919967651, + "learning_rate": 2.754630521441245e-05, + "loss": 0.2603, + "step": 10748, + "teacher_loss": 0.1958077996969223 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5829198360443115, + "learning_rate": 2.7545060224727902e-05, + "loss": 0.3749, + "step": 10749, + "teacher_loss": 0.35181140899658203 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.2617458999156952, + "learning_rate": 2.7543814947422744e-05, + "loss": 0.363, + "step": 10750, + "teacher_loss": 0.3742242753505707 + }, + { + "epoch": 1.94, + "eval_exact_match": 79.59318826868495, + "eval_f1": 86.78058162372317, + "step": 10750 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3319438099861145, + "learning_rate": 2.7542569382525508e-05, + "loss": 0.2879, + "step": 10751, + "teacher_loss": 0.2829638421535492 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.6153581738471985, + "learning_rate": 2.7541323530064765e-05, + "loss": 0.2803, + "step": 10752, + "teacher_loss": 0.2430334985256195 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.8033252358436584, + "learning_rate": 2.7540077390069075e-05, + "loss": 0.4027, + "step": 10753, + "teacher_loss": 0.35822615027427673 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.7388111352920532, + "learning_rate": 2.7538830962567008e-05, + "loss": 0.3252, + "step": 10754, + "teacher_loss": 0.2792031168937683 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5877249240875244, + "learning_rate": 2.753758424758714e-05, + "loss": 0.246, + "step": 10755, + "teacher_loss": 0.20805487036705017 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.43574514985084534, + "learning_rate": 2.753633724515805e-05, + "loss": 0.2431, + "step": 10756, + "teacher_loss": 0.22166277468204498 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5306398868560791, + "learning_rate": 2.7535089955308342e-05, + "loss": 0.2228, + "step": 10757, + "teacher_loss": 0.18858368694782257 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.3435639441013336, + "learning_rate": 2.75338423780666e-05, + "loss": 0.2605, + "step": 10758, + "teacher_loss": 0.2512151896953583 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.2963387370109558, + "learning_rate": 2.7532594513461427e-05, + "loss": 0.2535, + "step": 10759, + "teacher_loss": 0.24868829548358917 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.5550227165222168, + "learning_rate": 2.7531346361521435e-05, + "loss": 0.6236, + "step": 10760, + "teacher_loss": 0.6311668157577515 + }, + { + "compression_loss": 0.0, + "epoch": 1.94, + "label_loss": 0.40955448150634766, + "learning_rate": 2.7530097922275248e-05, + "loss": 0.5015, + "step": 10761, + "teacher_loss": 0.5117581486701965 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4119355380535126, + "learning_rate": 2.752884919575148e-05, + "loss": 0.2049, + "step": 10762, + "teacher_loss": 0.18187227845191956 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4913174510002136, + "learning_rate": 2.752760018197876e-05, + "loss": 0.275, + "step": 10763, + "teacher_loss": 0.2509962022304535 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.6365230083465576, + "learning_rate": 2.7526350880985732e-05, + "loss": 0.4799, + "step": 10764, + "teacher_loss": 0.46245259046554565 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.22546356916427612, + "learning_rate": 2.7525101292801036e-05, + "loss": 0.2152, + "step": 10765, + "teacher_loss": 0.2140844315290451 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.38395965099334717, + "learning_rate": 2.7523851417453322e-05, + "loss": 0.2388, + "step": 10766, + "teacher_loss": 0.22262342274188995 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4739437997341156, + "learning_rate": 2.7522601254971234e-05, + "loss": 0.2261, + "step": 10767, + "teacher_loss": 0.1985701322555542 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4453097879886627, + "learning_rate": 2.752135080538345e-05, + "loss": 0.2165, + "step": 10768, + "teacher_loss": 0.1910897195339203 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.8906028866767883, + "learning_rate": 2.7520100068718632e-05, + "loss": 0.5755, + "step": 10769, + "teacher_loss": 0.5404713749885559 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.384732723236084, + "learning_rate": 2.7518849045005458e-05, + "loss": 0.2776, + "step": 10770, + "teacher_loss": 0.2656790018081665 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.3922428488731384, + "learning_rate": 2.7517597734272605e-05, + "loss": 0.3961, + "step": 10771, + "teacher_loss": 0.39647775888442993 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.3590065538883209, + "learning_rate": 2.7516346136548764e-05, + "loss": 0.3662, + "step": 10772, + "teacher_loss": 0.3670479655265808 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.5111308693885803, + "learning_rate": 2.7515094251862635e-05, + "loss": 0.3665, + "step": 10773, + "teacher_loss": 0.35046809911727905 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.6661297082901001, + "learning_rate": 2.751384208024292e-05, + "loss": 0.3596, + "step": 10774, + "teacher_loss": 0.32554882764816284 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.29166215658187866, + "learning_rate": 2.7512589621718326e-05, + "loss": 0.277, + "step": 10775, + "teacher_loss": 0.2753344774246216 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.32293498516082764, + "learning_rate": 2.751133687631756e-05, + "loss": 0.2606, + "step": 10776, + "teacher_loss": 0.2537005543708801 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.25732457637786865, + "learning_rate": 2.751008384406935e-05, + "loss": 0.2186, + "step": 10777, + "teacher_loss": 0.2142890989780426 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.19241492450237274, + "learning_rate": 2.7508830525002434e-05, + "loss": 0.2555, + "step": 10778, + "teacher_loss": 0.26250070333480835 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.69423508644104, + "learning_rate": 2.750757691914553e-05, + "loss": 0.2844, + "step": 10779, + "teacher_loss": 0.2388879358768463 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.19666212797164917, + "learning_rate": 2.750632302652739e-05, + "loss": 0.2915, + "step": 10780, + "teacher_loss": 0.3020234704017639 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.6278557181358337, + "learning_rate": 2.7505068847176754e-05, + "loss": 0.2753, + "step": 10781, + "teacher_loss": 0.2361781746149063 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.49890565872192383, + "learning_rate": 2.7503814381122384e-05, + "loss": 0.2988, + "step": 10782, + "teacher_loss": 0.2765964865684509 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.3144882023334503, + "learning_rate": 2.750255962839304e-05, + "loss": 0.2128, + "step": 10783, + "teacher_loss": 0.20154812932014465 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.2659118175506592, + "learning_rate": 2.7501304589017487e-05, + "loss": 0.541, + "step": 10784, + "teacher_loss": 0.5715261697769165 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.324421226978302, + "learning_rate": 2.75000492630245e-05, + "loss": 0.2009, + "step": 10785, + "teacher_loss": 0.18713781237602234 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4521762430667877, + "learning_rate": 2.749879365044286e-05, + "loss": 0.2737, + "step": 10786, + "teacher_loss": 0.25387269258499146 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.23208793997764587, + "learning_rate": 2.7497537751301358e-05, + "loss": 0.2852, + "step": 10787, + "teacher_loss": 0.2911258637905121 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.6925598978996277, + "learning_rate": 2.7496281565628783e-05, + "loss": 0.2722, + "step": 10788, + "teacher_loss": 0.22546206414699554 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.69293612241745, + "learning_rate": 2.749502509345394e-05, + "loss": 0.3366, + "step": 10789, + "teacher_loss": 0.29703542590141296 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.15416736900806427, + "learning_rate": 2.7493768334805632e-05, + "loss": 0.1944, + "step": 10790, + "teacher_loss": 0.19892218708992004 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.3359658420085907, + "learning_rate": 2.7492511289712673e-05, + "loss": 0.2324, + "step": 10791, + "teacher_loss": 0.22084154188632965 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.31044816970825195, + "learning_rate": 2.7491253958203884e-05, + "loss": 0.2646, + "step": 10792, + "teacher_loss": 0.25951480865478516 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.14581097662448883, + "learning_rate": 2.7489996340308096e-05, + "loss": 0.2021, + "step": 10793, + "teacher_loss": 0.20840641856193542 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.45781636238098145, + "learning_rate": 2.7488738436054137e-05, + "loss": 0.3512, + "step": 10794, + "teacher_loss": 0.33938688039779663 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.2801690697669983, + "learning_rate": 2.748748024547085e-05, + "loss": 0.2789, + "step": 10795, + "teacher_loss": 0.2787937521934509 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.35359060764312744, + "learning_rate": 2.748622176858708e-05, + "loss": 0.2518, + "step": 10796, + "teacher_loss": 0.24047240614891052 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.30956515669822693, + "learning_rate": 2.7484963005431677e-05, + "loss": 0.2148, + "step": 10797, + "teacher_loss": 0.2042410671710968 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.544880747795105, + "learning_rate": 2.748370395603351e-05, + "loss": 0.3008, + "step": 10798, + "teacher_loss": 0.27367788553237915 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.41636085510253906, + "learning_rate": 2.7482444620421432e-05, + "loss": 0.2289, + "step": 10799, + "teacher_loss": 0.20806975662708282 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.8299413919448853, + "learning_rate": 2.748118499862433e-05, + "loss": 0.2637, + "step": 10800, + "teacher_loss": 0.2007812261581421 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 1.0994682312011719, + "learning_rate": 2.747992509067107e-05, + "loss": 0.5186, + "step": 10801, + "teacher_loss": 0.45409733057022095 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.3366830348968506, + "learning_rate": 2.747866489659055e-05, + "loss": 0.5206, + "step": 10802, + "teacher_loss": 0.5410189032554626 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.32088106870651245, + "learning_rate": 2.747740441641166e-05, + "loss": 0.2562, + "step": 10803, + "teacher_loss": 0.24896365404129028 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4379093050956726, + "learning_rate": 2.747614365016329e-05, + "loss": 0.3613, + "step": 10804, + "teacher_loss": 0.35281896591186523 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4277106523513794, + "learning_rate": 2.747488259787435e-05, + "loss": 0.2055, + "step": 10805, + "teacher_loss": 0.18086184561252594 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.406033992767334, + "learning_rate": 2.7473621259573756e-05, + "loss": 0.4108, + "step": 10806, + "teacher_loss": 0.4112781286239624 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4657803177833557, + "learning_rate": 2.7472359635290427e-05, + "loss": 0.2463, + "step": 10807, + "teacher_loss": 0.22186987102031708 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 1.2364765405654907, + "learning_rate": 2.747109772505328e-05, + "loss": 0.3738, + "step": 10808, + "teacher_loss": 0.27797022461891174 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.14064404368400574, + "learning_rate": 2.7469835528891257e-05, + "loss": 0.1236, + "step": 10809, + "teacher_loss": 0.12167838960886002 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.5955355763435364, + "learning_rate": 2.746857304683329e-05, + "loss": 0.2747, + "step": 10810, + "teacher_loss": 0.23910586535930634 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.43910861015319824, + "learning_rate": 2.7467310278908327e-05, + "loss": 0.3172, + "step": 10811, + "teacher_loss": 0.30368590354919434 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.2320491373538971, + "learning_rate": 2.7466047225145318e-05, + "loss": 0.2447, + "step": 10812, + "teacher_loss": 0.24612921476364136 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 1.0183064937591553, + "learning_rate": 2.746478388557322e-05, + "loss": 0.6473, + "step": 10813, + "teacher_loss": 0.6061270236968994 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.4801907539367676, + "learning_rate": 2.7463520260221e-05, + "loss": 0.3193, + "step": 10814, + "teacher_loss": 0.3014691174030304 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.20801588892936707, + "learning_rate": 2.746225634911763e-05, + "loss": 0.1663, + "step": 10815, + "teacher_loss": 0.16169731318950653 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.7599947452545166, + "learning_rate": 2.7460992152292084e-05, + "loss": 0.2504, + "step": 10816, + "teacher_loss": 0.19380278885364532 + }, + { + "compression_loss": 0.0, + "epoch": 1.95, + "label_loss": 0.3370821475982666, + "learning_rate": 2.7459727669773344e-05, + "loss": 0.2864, + "step": 10817, + "teacher_loss": 0.28074395656585693 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.47463536262512207, + "learning_rate": 2.7458462901590408e-05, + "loss": 0.3831, + "step": 10818, + "teacher_loss": 0.37289243936538696 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.5084630250930786, + "learning_rate": 2.7457197847772272e-05, + "loss": 0.2597, + "step": 10819, + "teacher_loss": 0.23206114768981934 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.33815276622772217, + "learning_rate": 2.7455932508347935e-05, + "loss": 0.2723, + "step": 10820, + "teacher_loss": 0.26503363251686096 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.24131813645362854, + "learning_rate": 2.7454666883346412e-05, + "loss": 0.1955, + "step": 10821, + "teacher_loss": 0.19040238857269287 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.472001850605011, + "learning_rate": 2.7453400972796717e-05, + "loss": 0.212, + "step": 10822, + "teacher_loss": 0.18306519091129303 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.92164146900177, + "learning_rate": 2.7452134776727875e-05, + "loss": 0.4391, + "step": 10823, + "teacher_loss": 0.38551491498947144 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 1.0369915962219238, + "learning_rate": 2.745086829516892e-05, + "loss": 0.4715, + "step": 10824, + "teacher_loss": 0.40871402621269226 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4346398711204529, + "learning_rate": 2.744960152814888e-05, + "loss": 0.2741, + "step": 10825, + "teacher_loss": 0.2562423646450043 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4701068103313446, + "learning_rate": 2.7448334475696806e-05, + "loss": 0.2143, + "step": 10826, + "teacher_loss": 0.18583060801029205 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4358345568180084, + "learning_rate": 2.744706713784174e-05, + "loss": 0.3101, + "step": 10827, + "teacher_loss": 0.2961447834968567 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.19895631074905396, + "learning_rate": 2.7445799514612747e-05, + "loss": 0.2121, + "step": 10828, + "teacher_loss": 0.21353840827941895 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6724644899368286, + "learning_rate": 2.7444531606038887e-05, + "loss": 0.2638, + "step": 10829, + "teacher_loss": 0.21841639280319214 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.5389783382415771, + "learning_rate": 2.744326341214922e-05, + "loss": 0.3087, + "step": 10830, + "teacher_loss": 0.2831568419933319 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.3820839822292328, + "learning_rate": 2.744199493297284e-05, + "loss": 0.2053, + "step": 10831, + "teacher_loss": 0.1856684386730194 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.5639641284942627, + "learning_rate": 2.744072616853881e-05, + "loss": 0.3486, + "step": 10832, + "teacher_loss": 0.3246185779571533 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.45832860469818115, + "learning_rate": 2.7439457118876235e-05, + "loss": 0.2544, + "step": 10833, + "teacher_loss": 0.23176434636116028 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.24422238767147064, + "learning_rate": 2.7438187784014203e-05, + "loss": 0.3508, + "step": 10834, + "teacher_loss": 0.36267563700675964 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.28793585300445557, + "learning_rate": 2.7436918163981815e-05, + "loss": 0.2053, + "step": 10835, + "teacher_loss": 0.19616399705410004 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.3634748160839081, + "learning_rate": 2.7435648258808176e-05, + "loss": 0.2595, + "step": 10836, + "teacher_loss": 0.24790015816688538 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.2011847198009491, + "learning_rate": 2.7434378068522413e-05, + "loss": 0.2526, + "step": 10837, + "teacher_loss": 0.25832095742225647 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.18684807419776917, + "learning_rate": 2.743310759315364e-05, + "loss": 0.2229, + "step": 10838, + "teacher_loss": 0.22689178586006165 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6791402697563171, + "learning_rate": 2.7431836832730988e-05, + "loss": 0.2753, + "step": 10839, + "teacher_loss": 0.23045684397220612 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6846766471862793, + "learning_rate": 2.7430565787283584e-05, + "loss": 0.505, + "step": 10840, + "teacher_loss": 0.4850236475467682 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.16580232977867126, + "learning_rate": 2.742929445684058e-05, + "loss": 0.3293, + "step": 10841, + "teacher_loss": 0.3475082814693451 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6000460386276245, + "learning_rate": 2.7428022841431118e-05, + "loss": 0.2687, + "step": 10842, + "teacher_loss": 0.2319331169128418 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.39683467149734497, + "learning_rate": 2.7426750941084355e-05, + "loss": 0.2445, + "step": 10843, + "teacher_loss": 0.22752338647842407 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4009547829627991, + "learning_rate": 2.7425478755829447e-05, + "loss": 0.5134, + "step": 10844, + "teacher_loss": 0.5259145498275757 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.414784699678421, + "learning_rate": 2.742420628569557e-05, + "loss": 0.3684, + "step": 10845, + "teacher_loss": 0.3632541298866272 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.38040831685066223, + "learning_rate": 2.7422933530711883e-05, + "loss": 0.2706, + "step": 10846, + "teacher_loss": 0.25841739773750305 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.56363445520401, + "learning_rate": 2.7421660490907586e-05, + "loss": 0.225, + "step": 10847, + "teacher_loss": 0.18732300400733948 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4187430143356323, + "learning_rate": 2.742038716631185e-05, + "loss": 0.2192, + "step": 10848, + "teacher_loss": 0.19706645607948303 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.38193169236183167, + "learning_rate": 2.741911355695388e-05, + "loss": 0.3127, + "step": 10849, + "teacher_loss": 0.3050064146518707 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.5706260204315186, + "learning_rate": 2.7417839662862865e-05, + "loss": 0.5728, + "step": 10850, + "teacher_loss": 0.5730546712875366 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.21841847896575928, + "learning_rate": 2.741656548406802e-05, + "loss": 0.2058, + "step": 10851, + "teacher_loss": 0.204440176486969 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.5540941953659058, + "learning_rate": 2.741529102059855e-05, + "loss": 0.2392, + "step": 10852, + "teacher_loss": 0.20417365431785583 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6483820676803589, + "learning_rate": 2.7414016272483685e-05, + "loss": 0.3773, + "step": 10853, + "teacher_loss": 0.34717458486557007 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.48529040813446045, + "learning_rate": 2.741274123975265e-05, + "loss": 0.3896, + "step": 10854, + "teacher_loss": 0.37894201278686523 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 1.2216346263885498, + "learning_rate": 2.7411465922434666e-05, + "loss": 0.3905, + "step": 10855, + "teacher_loss": 0.298186719417572 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.3121795952320099, + "learning_rate": 2.7410190320558985e-05, + "loss": 0.2768, + "step": 10856, + "teacher_loss": 0.2728690505027771 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.7875667214393616, + "learning_rate": 2.7408914434154844e-05, + "loss": 0.4107, + "step": 10857, + "teacher_loss": 0.3688180148601532 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4278305172920227, + "learning_rate": 2.7407638263251503e-05, + "loss": 0.3526, + "step": 10858, + "teacher_loss": 0.3442361354827881 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6039260625839233, + "learning_rate": 2.7406361807878215e-05, + "loss": 0.3028, + "step": 10859, + "teacher_loss": 0.2693212628364563 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.15492716431617737, + "learning_rate": 2.7405085068064246e-05, + "loss": 0.1728, + "step": 10860, + "teacher_loss": 0.17474587261676788 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.7073568105697632, + "learning_rate": 2.7403808043838866e-05, + "loss": 0.3633, + "step": 10861, + "teacher_loss": 0.32507115602493286 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.2879864573478699, + "learning_rate": 2.740253073523136e-05, + "loss": 0.1946, + "step": 10862, + "teacher_loss": 0.18417751789093018 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.6933639645576477, + "learning_rate": 2.7401253142271008e-05, + "loss": 0.2512, + "step": 10863, + "teacher_loss": 0.20202794671058655 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.3929342031478882, + "learning_rate": 2.7399975264987102e-05, + "loss": 0.2759, + "step": 10864, + "teacher_loss": 0.2629064917564392 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.40562641620635986, + "learning_rate": 2.739869710340894e-05, + "loss": 0.2446, + "step": 10865, + "teacher_loss": 0.22672992944717407 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.4464614987373352, + "learning_rate": 2.7397418657565828e-05, + "loss": 0.2636, + "step": 10866, + "teacher_loss": 0.24329717457294464 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.20394085347652435, + "learning_rate": 2.7396139927487074e-05, + "loss": 0.2883, + "step": 10867, + "teacher_loss": 0.2976875901222229 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.19401872158050537, + "learning_rate": 2.7394860913202e-05, + "loss": 0.1663, + "step": 10868, + "teacher_loss": 0.1631706804037094 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.7677251100540161, + "learning_rate": 2.7393581614739924e-05, + "loss": 0.8347, + "step": 10869, + "teacher_loss": 0.8421406745910645 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.37438052892684937, + "learning_rate": 2.739230203213018e-05, + "loss": 0.2658, + "step": 10870, + "teacher_loss": 0.25374093651771545 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.648666262626648, + "learning_rate": 2.73910221654021e-05, + "loss": 0.2555, + "step": 10871, + "teacher_loss": 0.21179074048995972 + }, + { + "compression_loss": 0.0, + "epoch": 1.96, + "label_loss": 0.21512557566165924, + "learning_rate": 2.738974201458504e-05, + "loss": 0.2627, + "step": 10872, + "teacher_loss": 0.2679938077926636 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.7019003033638, + "learning_rate": 2.738846157970834e-05, + "loss": 0.3608, + "step": 10873, + "teacher_loss": 0.3228452205657959 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.6334228515625, + "learning_rate": 2.738718086080136e-05, + "loss": 0.677, + "step": 10874, + "teacher_loss": 0.6818009614944458 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.19164009392261505, + "learning_rate": 2.7385899857893453e-05, + "loss": 0.2295, + "step": 10875, + "teacher_loss": 0.2337537258863449 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.28335070610046387, + "learning_rate": 2.7384618571014005e-05, + "loss": 0.2989, + "step": 10876, + "teacher_loss": 0.30064892768859863 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.9634113311767578, + "learning_rate": 2.7383337000192382e-05, + "loss": 0.2854, + "step": 10877, + "teacher_loss": 0.21010950207710266 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.7090965509414673, + "learning_rate": 2.738205514545797e-05, + "loss": 0.5564, + "step": 10878, + "teacher_loss": 0.5394244194030762 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.38687464594841003, + "learning_rate": 2.7380773006840154e-05, + "loss": 0.4509, + "step": 10879, + "teacher_loss": 0.45806825160980225 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.16957297921180725, + "learning_rate": 2.7379490584368336e-05, + "loss": 0.204, + "step": 10880, + "teacher_loss": 0.20782233774662018 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.43553513288497925, + "learning_rate": 2.737820787807191e-05, + "loss": 0.2311, + "step": 10881, + "teacher_loss": 0.2083931416273117 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.18465974926948547, + "learning_rate": 2.7376924887980293e-05, + "loss": 0.2444, + "step": 10882, + "teacher_loss": 0.25104832649230957 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.2371116578578949, + "learning_rate": 2.7375641614122897e-05, + "loss": 0.2343, + "step": 10883, + "teacher_loss": 0.23395338654518127 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.7803940773010254, + "learning_rate": 2.737435805652914e-05, + "loss": 0.3465, + "step": 10884, + "teacher_loss": 0.2983270287513733 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.5725279450416565, + "learning_rate": 2.7373074215228452e-05, + "loss": 0.2961, + "step": 10885, + "teacher_loss": 0.26542454957962036 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.39034268260002136, + "learning_rate": 2.737179009025027e-05, + "loss": 0.2055, + "step": 10886, + "teacher_loss": 0.18501505255699158 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.29216527938842773, + "learning_rate": 2.7370505681624033e-05, + "loss": 0.2319, + "step": 10887, + "teacher_loss": 0.22524479031562805 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.33569180965423584, + "learning_rate": 2.7369220989379192e-05, + "loss": 0.2201, + "step": 10888, + "teacher_loss": 0.20729121565818787 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 1.540254831314087, + "learning_rate": 2.7367936013545196e-05, + "loss": 0.381, + "step": 10889, + "teacher_loss": 0.2521928548812866 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.2732278108596802, + "learning_rate": 2.736665075415151e-05, + "loss": 0.1675, + "step": 10890, + "teacher_loss": 0.15572570264339447 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 1.0981857776641846, + "learning_rate": 2.73653652112276e-05, + "loss": 0.3514, + "step": 10891, + "teacher_loss": 0.26844727993011475 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.3784533441066742, + "learning_rate": 2.7364079384802935e-05, + "loss": 0.2935, + "step": 10892, + "teacher_loss": 0.2840586304664612 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.3704400956630707, + "learning_rate": 2.736279327490701e-05, + "loss": 0.2128, + "step": 10893, + "teacher_loss": 0.1952662467956543 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.4308822453022003, + "learning_rate": 2.7361506881569288e-05, + "loss": 0.2534, + "step": 10894, + "teacher_loss": 0.23368799686431885 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.4873030483722687, + "learning_rate": 2.7360220204819276e-05, + "loss": 0.3303, + "step": 10895, + "teacher_loss": 0.31285595893859863 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.21434643864631653, + "learning_rate": 2.735893324468648e-05, + "loss": 0.1931, + "step": 10896, + "teacher_loss": 0.19079014658927917 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.5113731622695923, + "learning_rate": 2.7357646001200394e-05, + "loss": 0.2904, + "step": 10897, + "teacher_loss": 0.26588648557662964 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.362072229385376, + "learning_rate": 2.7356358474390536e-05, + "loss": 0.1873, + "step": 10898, + "teacher_loss": 0.16784429550170898 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.2871999442577362, + "learning_rate": 2.735507066428643e-05, + "loss": 0.2118, + "step": 10899, + "teacher_loss": 0.20343969762325287 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.4309789538383484, + "learning_rate": 2.7353782570917587e-05, + "loss": 0.2412, + "step": 10900, + "teacher_loss": 0.22007720172405243 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.2927132844924927, + "learning_rate": 2.7352494194313552e-05, + "loss": 0.2314, + "step": 10901, + "teacher_loss": 0.224545419216156 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.4730851352214813, + "learning_rate": 2.735120553450386e-05, + "loss": 0.2465, + "step": 10902, + "teacher_loss": 0.2213568389415741 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.9335314035415649, + "learning_rate": 2.7349916591518057e-05, + "loss": 0.3854, + "step": 10903, + "teacher_loss": 0.32453060150146484 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.30453547835350037, + "learning_rate": 2.734862736538569e-05, + "loss": 0.1913, + "step": 10904, + "teacher_loss": 0.17870302498340607 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.680169403553009, + "learning_rate": 2.734733785613632e-05, + "loss": 0.2841, + "step": 10905, + "teacher_loss": 0.2401125431060791 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.6479673981666565, + "learning_rate": 2.734604806379952e-05, + "loss": 0.5911, + "step": 10906, + "teacher_loss": 0.584787905216217 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.6036485433578491, + "learning_rate": 2.7344757988404845e-05, + "loss": 0.2931, + "step": 10907, + "teacher_loss": 0.25862234830856323 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.3791617751121521, + "learning_rate": 2.7343467629981886e-05, + "loss": 0.37, + "step": 10908, + "teacher_loss": 0.36900314688682556 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.38305845856666565, + "learning_rate": 2.734217698856022e-05, + "loss": 0.2161, + "step": 10909, + "teacher_loss": 0.19755110144615173 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.8357294797897339, + "learning_rate": 2.734088606416944e-05, + "loss": 0.3518, + "step": 10910, + "teacher_loss": 0.29808545112609863 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.2530587315559387, + "learning_rate": 2.733959485683914e-05, + "loss": 0.2364, + "step": 10911, + "teacher_loss": 0.23452477157115936 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.4362846910953522, + "learning_rate": 2.733830336659893e-05, + "loss": 0.2377, + "step": 10912, + "teacher_loss": 0.21564152836799622 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 1.089313268661499, + "learning_rate": 2.733701159347841e-05, + "loss": 0.2768, + "step": 10913, + "teacher_loss": 0.18656393885612488 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.3766339421272278, + "learning_rate": 2.7335719537507212e-05, + "loss": 0.3003, + "step": 10914, + "teacher_loss": 0.29186469316482544 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.6957297325134277, + "learning_rate": 2.733442719871494e-05, + "loss": 0.4263, + "step": 10915, + "teacher_loss": 0.3963565230369568 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.6060894131660461, + "learning_rate": 2.733313457713124e-05, + "loss": 0.3108, + "step": 10916, + "teacher_loss": 0.2779746651649475 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.488019198179245, + "learning_rate": 2.7331841672785734e-05, + "loss": 0.3714, + "step": 10917, + "teacher_loss": 0.358467161655426 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.739631175994873, + "learning_rate": 2.7330548485708072e-05, + "loss": 0.406, + "step": 10918, + "teacher_loss": 0.36892497539520264 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.35032591223716736, + "learning_rate": 2.732925501592791e-05, + "loss": 0.208, + "step": 10919, + "teacher_loss": 0.19222550094127655 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.8082783222198486, + "learning_rate": 2.7327961263474887e-05, + "loss": 0.371, + "step": 10920, + "teacher_loss": 0.3224300146102905 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.36098629236221313, + "learning_rate": 2.7326667228378677e-05, + "loss": 0.3273, + "step": 10921, + "teacher_loss": 0.32356658577919006 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.5517736077308655, + "learning_rate": 2.7325372910668948e-05, + "loss": 0.3097, + "step": 10922, + "teacher_loss": 0.2827921509742737 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.15011778473854065, + "learning_rate": 2.7324078310375367e-05, + "loss": 0.1954, + "step": 10923, + "teacher_loss": 0.2004351019859314 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.2400776445865631, + "learning_rate": 2.732278342752762e-05, + "loss": 0.2036, + "step": 10924, + "teacher_loss": 0.1995181441307068 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.33505213260650635, + "learning_rate": 2.7321488262155396e-05, + "loss": 0.2269, + "step": 10925, + "teacher_loss": 0.21483266353607178 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.5862535238265991, + "learning_rate": 2.7320192814288386e-05, + "loss": 0.3549, + "step": 10926, + "teacher_loss": 0.3292403221130371 + }, + { + "compression_loss": 0.0, + "epoch": 1.97, + "label_loss": 0.3221989870071411, + "learning_rate": 2.7318897083956295e-05, + "loss": 0.2413, + "step": 10927, + "teacher_loss": 0.23228782415390015 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.28338491916656494, + "learning_rate": 2.7317601071188823e-05, + "loss": 0.1995, + "step": 10928, + "teacher_loss": 0.19020111858844757 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 1.0176000595092773, + "learning_rate": 2.7316304776015695e-05, + "loss": 0.4314, + "step": 10929, + "teacher_loss": 0.36625435948371887 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.48951399326324463, + "learning_rate": 2.7315008198466623e-05, + "loss": 0.3323, + "step": 10930, + "teacher_loss": 0.31481537222862244 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.684956967830658, + "learning_rate": 2.7313711338571333e-05, + "loss": 0.327, + "step": 10931, + "teacher_loss": 0.2872406244277954 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.403883159160614, + "learning_rate": 2.7312414196359562e-05, + "loss": 0.2726, + "step": 10932, + "teacher_loss": 0.2580595016479492 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5118328332901001, + "learning_rate": 2.7311116771861044e-05, + "loss": 0.2477, + "step": 10933, + "teacher_loss": 0.2183288037776947 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.14330704510211945, + "learning_rate": 2.7309819065105537e-05, + "loss": 0.2447, + "step": 10934, + "teacher_loss": 0.25598254799842834 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.30741801857948303, + "learning_rate": 2.7308521076122782e-05, + "loss": 0.2859, + "step": 10935, + "teacher_loss": 0.28353387117385864 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5244690775871277, + "learning_rate": 2.730722280494254e-05, + "loss": 0.2406, + "step": 10936, + "teacher_loss": 0.2090718150138855 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5133647322654724, + "learning_rate": 2.7305924251594577e-05, + "loss": 0.2748, + "step": 10937, + "teacher_loss": 0.24834682047367096 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.238912433385849, + "learning_rate": 2.730462541610867e-05, + "loss": 0.1776, + "step": 10938, + "teacher_loss": 0.17078456282615662 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4113699793815613, + "learning_rate": 2.7303326298514588e-05, + "loss": 0.2609, + "step": 10939, + "teacher_loss": 0.24417494237422943 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.3746951222419739, + "learning_rate": 2.7302026898842126e-05, + "loss": 0.2841, + "step": 10940, + "teacher_loss": 0.2740182876586914 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.7371665239334106, + "learning_rate": 2.7300727217121068e-05, + "loss": 0.761, + "step": 10941, + "teacher_loss": 0.7636754512786865 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.7845966815948486, + "learning_rate": 2.729942725338122e-05, + "loss": 0.3151, + "step": 10942, + "teacher_loss": 0.262956440448761 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.7906073331832886, + "learning_rate": 2.7298127007652373e-05, + "loss": 0.331, + "step": 10943, + "teacher_loss": 0.2799391746520996 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4016439914703369, + "learning_rate": 2.729682647996435e-05, + "loss": 0.2364, + "step": 10944, + "teacher_loss": 0.21809351444244385 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.29760733246803284, + "learning_rate": 2.729552567034696e-05, + "loss": 0.2327, + "step": 10945, + "teacher_loss": 0.22552502155303955 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.45928603410720825, + "learning_rate": 2.729422457883003e-05, + "loss": 0.2799, + "step": 10946, + "teacher_loss": 0.26000452041625977 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4973798394203186, + "learning_rate": 2.72929232054434e-05, + "loss": 0.2796, + "step": 10947, + "teacher_loss": 0.2553505599498749 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.25807061791419983, + "learning_rate": 2.7291621550216887e-05, + "loss": 0.2343, + "step": 10948, + "teacher_loss": 0.23163972795009613 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.7279292345046997, + "learning_rate": 2.7290319613180348e-05, + "loss": 0.8084, + "step": 10949, + "teacher_loss": 0.8173166513442993 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4489055573940277, + "learning_rate": 2.7289017394363625e-05, + "loss": 0.307, + "step": 10950, + "teacher_loss": 0.2911779582500458 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.3300057351589203, + "learning_rate": 2.728771489379658e-05, + "loss": 0.2444, + "step": 10951, + "teacher_loss": 0.23492828011512756 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.2552585005760193, + "learning_rate": 2.7286412111509075e-05, + "loss": 0.2475, + "step": 10952, + "teacher_loss": 0.2466193586587906 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.290688157081604, + "learning_rate": 2.7285109047530975e-05, + "loss": 0.2411, + "step": 10953, + "teacher_loss": 0.23561015725135803 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.3721437454223633, + "learning_rate": 2.7283805701892156e-05, + "loss": 0.2463, + "step": 10954, + "teacher_loss": 0.2323523610830307 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.2926291823387146, + "learning_rate": 2.7282502074622505e-05, + "loss": 0.2198, + "step": 10955, + "teacher_loss": 0.2116565853357315 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.48973768949508667, + "learning_rate": 2.72811981657519e-05, + "loss": 0.2423, + "step": 10956, + "teacher_loss": 0.21477633714675903 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.6451169848442078, + "learning_rate": 2.7279893975310246e-05, + "loss": 0.2776, + "step": 10957, + "teacher_loss": 0.2367827147245407 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.7494860887527466, + "learning_rate": 2.727858950332744e-05, + "loss": 0.4126, + "step": 10958, + "teacher_loss": 0.37520962953567505 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.26051971316337585, + "learning_rate": 2.727728474983339e-05, + "loss": 0.203, + "step": 10959, + "teacher_loss": 0.19658610224723816 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4420437216758728, + "learning_rate": 2.727597971485801e-05, + "loss": 0.262, + "step": 10960, + "teacher_loss": 0.24203094840049744 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.6790921092033386, + "learning_rate": 2.727467439843122e-05, + "loss": 0.529, + "step": 10961, + "teacher_loss": 0.5122976303100586 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.6930454969406128, + "learning_rate": 2.7273368800582946e-05, + "loss": 0.3163, + "step": 10962, + "teacher_loss": 0.27444779872894287 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.2740445137023926, + "learning_rate": 2.7272062921343123e-05, + "loss": 0.4054, + "step": 10963, + "teacher_loss": 0.4199417233467102 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4202827215194702, + "learning_rate": 2.7270756760741692e-05, + "loss": 0.2879, + "step": 10964, + "teacher_loss": 0.2732202410697937 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.7727272510528564, + "learning_rate": 2.72694503188086e-05, + "loss": 0.2671, + "step": 10965, + "teacher_loss": 0.21095682680606842 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.8302296996116638, + "learning_rate": 2.7268143595573793e-05, + "loss": 0.3053, + "step": 10966, + "teacher_loss": 0.2469996064901352 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.6615298390388489, + "learning_rate": 2.7266836591067237e-05, + "loss": 0.2609, + "step": 10967, + "teacher_loss": 0.2163485586643219 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 1.0277924537658691, + "learning_rate": 2.72655293053189e-05, + "loss": 0.368, + "step": 10968, + "teacher_loss": 0.29464367032051086 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.32207390666007996, + "learning_rate": 2.7264221738358742e-05, + "loss": 0.1983, + "step": 10969, + "teacher_loss": 0.18452855944633484 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.41837024688720703, + "learning_rate": 2.726291389021676e-05, + "loss": 0.3084, + "step": 10970, + "teacher_loss": 0.29618021845817566 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5088686347007751, + "learning_rate": 2.7261605760922918e-05, + "loss": 0.2694, + "step": 10971, + "teacher_loss": 0.24279162287712097 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.30409446358680725, + "learning_rate": 2.7260297350507227e-05, + "loss": 0.2762, + "step": 10972, + "teacher_loss": 0.27307072281837463 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.47880637645721436, + "learning_rate": 2.725898865899967e-05, + "loss": 0.2337, + "step": 10973, + "teacher_loss": 0.2064475417137146 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.1640702486038208, + "learning_rate": 2.725767968643026e-05, + "loss": 0.2275, + "step": 10974, + "teacher_loss": 0.23455876111984253 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5323233008384705, + "learning_rate": 2.7256370432829008e-05, + "loss": 0.3299, + "step": 10975, + "teacher_loss": 0.30745208263397217 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5228419303894043, + "learning_rate": 2.7255060898225924e-05, + "loss": 0.3088, + "step": 10976, + "teacher_loss": 0.2850039005279541 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.30222204327583313, + "learning_rate": 2.7253751082651038e-05, + "loss": 0.2152, + "step": 10977, + "teacher_loss": 0.20551280677318573 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.46103575825691223, + "learning_rate": 2.7252440986134375e-05, + "loss": 0.2674, + "step": 10978, + "teacher_loss": 0.2458699345588684 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.904411792755127, + "learning_rate": 2.7251130608705976e-05, + "loss": 0.3183, + "step": 10979, + "teacher_loss": 0.2531786561012268 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.43219244480133057, + "learning_rate": 2.7249819950395886e-05, + "loss": 0.2839, + "step": 10980, + "teacher_loss": 0.26740720868110657 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.48594194650650024, + "learning_rate": 2.7248509011234154e-05, + "loss": 0.2162, + "step": 10981, + "teacher_loss": 0.1862303465604782 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.5689194202423096, + "learning_rate": 2.7247197791250828e-05, + "loss": 0.2238, + "step": 10982, + "teacher_loss": 0.185488760471344 + }, + { + "compression_loss": 0.0, + "epoch": 1.98, + "label_loss": 0.4455610513687134, + "learning_rate": 2.7245886290475974e-05, + "loss": 0.2662, + "step": 10983, + "teacher_loss": 0.24628344178199768 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.3862900137901306, + "learning_rate": 2.7244574508939667e-05, + "loss": 0.2248, + "step": 10984, + "teacher_loss": 0.20683839917182922 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.34636855125427246, + "learning_rate": 2.7243262446671976e-05, + "loss": 0.2222, + "step": 10985, + "teacher_loss": 0.20837363600730896 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.45351868867874146, + "learning_rate": 2.7241950103702983e-05, + "loss": 0.2977, + "step": 10986, + "teacher_loss": 0.28036606311798096 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.3287506699562073, + "learning_rate": 2.7240637480062783e-05, + "loss": 0.1607, + "step": 10987, + "teacher_loss": 0.1419958770275116 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.28295910358428955, + "learning_rate": 2.723932457578146e-05, + "loss": 0.1632, + "step": 10988, + "teacher_loss": 0.149948388338089 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.3634149730205536, + "learning_rate": 2.7238011390889116e-05, + "loss": 0.3118, + "step": 10989, + "teacher_loss": 0.30611008405685425 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.49249735474586487, + "learning_rate": 2.723669792541587e-05, + "loss": 0.2111, + "step": 10990, + "teacher_loss": 0.17982915043830872 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.7725654244422913, + "learning_rate": 2.7235384179391824e-05, + "loss": 0.3167, + "step": 10991, + "teacher_loss": 0.2660036087036133 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.32238221168518066, + "learning_rate": 2.7234070152847104e-05, + "loss": 0.2291, + "step": 10992, + "teacher_loss": 0.21874716877937317 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.40911489725112915, + "learning_rate": 2.7232755845811832e-05, + "loss": 0.2856, + "step": 10993, + "teacher_loss": 0.27186620235443115 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.42562103271484375, + "learning_rate": 2.7231441258316145e-05, + "loss": 0.3217, + "step": 10994, + "teacher_loss": 0.310102641582489 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.24604177474975586, + "learning_rate": 2.7230126390390187e-05, + "loss": 0.2381, + "step": 10995, + "teacher_loss": 0.23723720014095306 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.1614038050174713, + "learning_rate": 2.7228811242064092e-05, + "loss": 0.2019, + "step": 10996, + "teacher_loss": 0.20636524260044098 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.22733375430107117, + "learning_rate": 2.7227495813368022e-05, + "loss": 0.229, + "step": 10997, + "teacher_loss": 0.22922906279563904 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5421076416969299, + "learning_rate": 2.7226180104332134e-05, + "loss": 0.2649, + "step": 10998, + "teacher_loss": 0.23409873247146606 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.6794562339782715, + "learning_rate": 2.7224864114986592e-05, + "loss": 0.3547, + "step": 10999, + "teacher_loss": 0.3186277449131012 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5141890645027161, + "learning_rate": 2.7223547845361565e-05, + "loss": 0.2873, + "step": 11000, + "teacher_loss": 0.2620975077152252 + }, + { + "epoch": 1.99, + "eval_exact_match": 79.30936613055819, + "eval_f1": 86.80744384812628, + "step": 11000 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5698481798171997, + "learning_rate": 2.7222231295487237e-05, + "loss": 0.264, + "step": 11001, + "teacher_loss": 0.23004421591758728 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.2744436264038086, + "learning_rate": 2.722091446539379e-05, + "loss": 0.1968, + "step": 11002, + "teacher_loss": 0.1881275624036789 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.2508719861507416, + "learning_rate": 2.721959735511141e-05, + "loss": 0.2431, + "step": 11003, + "teacher_loss": 0.24219286441802979 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.6528825759887695, + "learning_rate": 2.7218279964670302e-05, + "loss": 0.3563, + "step": 11004, + "teacher_loss": 0.3233593702316284 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.32213282585144043, + "learning_rate": 2.7216962294100668e-05, + "loss": 0.2341, + "step": 11005, + "teacher_loss": 0.2243487536907196 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.4476689100265503, + "learning_rate": 2.7215644343432718e-05, + "loss": 0.2406, + "step": 11006, + "teacher_loss": 0.21757693588733673 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.20841731131076813, + "learning_rate": 2.7214326112696662e-05, + "loss": 0.2285, + "step": 11007, + "teacher_loss": 0.23071825504302979 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.2793912887573242, + "learning_rate": 2.721300760192273e-05, + "loss": 0.2365, + "step": 11008, + "teacher_loss": 0.23173384368419647 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.7197442054748535, + "learning_rate": 2.7211688811141152e-05, + "loss": 0.3045, + "step": 11009, + "teacher_loss": 0.25840288400650024 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.6929067373275757, + "learning_rate": 2.7210369740382166e-05, + "loss": 0.3047, + "step": 11010, + "teacher_loss": 0.2615973949432373 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.36473339796066284, + "learning_rate": 2.7209050389676006e-05, + "loss": 0.1835, + "step": 11011, + "teacher_loss": 0.16336104273796082 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5672443509101868, + "learning_rate": 2.7207730759052925e-05, + "loss": 0.3647, + "step": 11012, + "teacher_loss": 0.34220004081726074 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5090664625167847, + "learning_rate": 2.720641084854318e-05, + "loss": 0.2553, + "step": 11013, + "teacher_loss": 0.227097749710083 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.6856584548950195, + "learning_rate": 2.720509065817703e-05, + "loss": 0.3141, + "step": 11014, + "teacher_loss": 0.2728331685066223 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.32951289415359497, + "learning_rate": 2.7203770187984746e-05, + "loss": 0.222, + "step": 11015, + "teacher_loss": 0.21004731953144073 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.3552158772945404, + "learning_rate": 2.7202449437996596e-05, + "loss": 0.234, + "step": 11016, + "teacher_loss": 0.2205621302127838 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.9253280162811279, + "learning_rate": 2.7201128408242866e-05, + "loss": 0.3404, + "step": 11017, + "teacher_loss": 0.275382399559021 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.9888057708740234, + "learning_rate": 2.7199807098753846e-05, + "loss": 0.3642, + "step": 11018, + "teacher_loss": 0.2947728633880615 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5136024355888367, + "learning_rate": 2.7198485509559825e-05, + "loss": 0.1952, + "step": 11019, + "teacher_loss": 0.15978918969631195 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.3180859684944153, + "learning_rate": 2.71971636406911e-05, + "loss": 0.2075, + "step": 11020, + "teacher_loss": 0.19520969688892365 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.44778770208358765, + "learning_rate": 2.7195841492177988e-05, + "loss": 0.3122, + "step": 11021, + "teacher_loss": 0.29713016748428345 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 1.3251254558563232, + "learning_rate": 2.7194519064050792e-05, + "loss": 0.4342, + "step": 11022, + "teacher_loss": 0.3351704776287079 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5996873378753662, + "learning_rate": 2.7193196356339837e-05, + "loss": 0.2861, + "step": 11023, + "teacher_loss": 0.25128084421157837 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.4814158082008362, + "learning_rate": 2.7191873369075443e-05, + "loss": 0.414, + "step": 11024, + "teacher_loss": 0.406563401222229 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.42363858222961426, + "learning_rate": 2.7190550102287953e-05, + "loss": 0.3263, + "step": 11025, + "teacher_loss": 0.31544697284698486 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.6484353542327881, + "learning_rate": 2.7189226556007692e-05, + "loss": 0.3682, + "step": 11026, + "teacher_loss": 0.33706796169281006 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.4444020688533783, + "learning_rate": 2.718790273026501e-05, + "loss": 0.3315, + "step": 11027, + "teacher_loss": 0.31896984577178955 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.41422030329704285, + "learning_rate": 2.7186578625090266e-05, + "loss": 0.3275, + "step": 11028, + "teacher_loss": 0.31787851452827454 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.4196636974811554, + "learning_rate": 2.7185254240513806e-05, + "loss": 0.3374, + "step": 11029, + "teacher_loss": 0.3282148838043213 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5356686115264893, + "learning_rate": 2.7183929576566e-05, + "loss": 0.2062, + "step": 11030, + "teacher_loss": 0.16959501802921295 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.40177783370018005, + "learning_rate": 2.718260463327722e-05, + "loss": 0.3218, + "step": 11031, + "teacher_loss": 0.3129402697086334 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.32416480779647827, + "learning_rate": 2.7181279410677842e-05, + "loss": 0.1872, + "step": 11032, + "teacher_loss": 0.17201584577560425 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.47777456045150757, + "learning_rate": 2.7179953908798246e-05, + "loss": 0.2928, + "step": 11033, + "teacher_loss": 0.2722725570201874 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.27770549058914185, + "learning_rate": 2.717862812766882e-05, + "loss": 0.2192, + "step": 11034, + "teacher_loss": 0.21273157000541687 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.583075225353241, + "learning_rate": 2.7177302067319977e-05, + "loss": 0.4403, + "step": 11035, + "teacher_loss": 0.4244568943977356 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.7338087558746338, + "learning_rate": 2.71759757277821e-05, + "loss": 0.2622, + "step": 11036, + "teacher_loss": 0.20981313288211823 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.5577709674835205, + "learning_rate": 2.7174649109085605e-05, + "loss": 0.4409, + "step": 11037, + "teacher_loss": 0.42786386609077454 + }, + { + "compression_loss": 0.0, + "epoch": 1.99, + "label_loss": 0.1353207528591156, + "learning_rate": 2.7173322211260906e-05, + "loss": 0.1885, + "step": 11038, + "teacher_loss": 0.19436055421829224 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.24597877264022827, + "learning_rate": 2.7171995034338427e-05, + "loss": 0.2351, + "step": 11039, + "teacher_loss": 0.23393619060516357 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3176301419734955, + "learning_rate": 2.7170667578348598e-05, + "loss": 0.2409, + "step": 11040, + "teacher_loss": 0.23238921165466309 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.6035487651824951, + "learning_rate": 2.7169339843321846e-05, + "loss": 0.3282, + "step": 11041, + "teacher_loss": 0.29760265350341797 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3541339933872223, + "learning_rate": 2.716801182928862e-05, + "loss": 0.2158, + "step": 11042, + "teacher_loss": 0.20044703781604767 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.42789238691329956, + "learning_rate": 2.7166683536279363e-05, + "loss": 0.3725, + "step": 11043, + "teacher_loss": 0.3663749098777771 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3658503592014313, + "learning_rate": 2.7165354964324534e-05, + "loss": 0.3898, + "step": 11044, + "teacher_loss": 0.3924328088760376 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.7615195512771606, + "learning_rate": 2.7164026113454585e-05, + "loss": 0.3247, + "step": 11045, + "teacher_loss": 0.27612221240997314 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.6370681524276733, + "learning_rate": 2.7162696983699988e-05, + "loss": 0.3435, + "step": 11046, + "teacher_loss": 0.31082576513290405 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.29654231667518616, + "learning_rate": 2.7161367575091217e-05, + "loss": 0.2228, + "step": 11047, + "teacher_loss": 0.21461719274520874 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3899002969264984, + "learning_rate": 2.7160037887658743e-05, + "loss": 0.2917, + "step": 11048, + "teacher_loss": 0.280734121799469 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.15593791007995605, + "learning_rate": 2.715870792143307e-05, + "loss": 0.1782, + "step": 11049, + "teacher_loss": 0.18067193031311035 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.7147342562675476, + "learning_rate": 2.7157377676444664e-05, + "loss": 0.3088, + "step": 11050, + "teacher_loss": 0.26368898153305054 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.25797319412231445, + "learning_rate": 2.7156047152724046e-05, + "loss": 0.2281, + "step": 11051, + "teacher_loss": 0.22473329305648804 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.7149232625961304, + "learning_rate": 2.715471635030171e-05, + "loss": 0.3682, + "step": 11052, + "teacher_loss": 0.3297004997730255 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3798366189002991, + "learning_rate": 2.715338526920817e-05, + "loss": 0.2177, + "step": 11053, + "teacher_loss": 0.19966942071914673 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.36372238397598267, + "learning_rate": 2.7152053909473945e-05, + "loss": 0.2595, + "step": 11054, + "teacher_loss": 0.24791128933429718 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.1740797460079193, + "learning_rate": 2.715072227112956e-05, + "loss": 0.2234, + "step": 11055, + "teacher_loss": 0.2288346290588379 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.2681766450405121, + "learning_rate": 2.7149390354205533e-05, + "loss": 0.2768, + "step": 11056, + "teacher_loss": 0.277803897857666 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4045276939868927, + "learning_rate": 2.7148058158732423e-05, + "loss": 0.2587, + "step": 11057, + "teacher_loss": 0.24251475930213928 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.18611067533493042, + "learning_rate": 2.7146725684740754e-05, + "loss": 0.2105, + "step": 11058, + "teacher_loss": 0.2132633924484253 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.7857559323310852, + "learning_rate": 2.7145392932261085e-05, + "loss": 0.3207, + "step": 11059, + "teacher_loss": 0.2689828872680664 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.35701483488082886, + "learning_rate": 2.714405990132397e-05, + "loss": 0.2309, + "step": 11060, + "teacher_loss": 0.21685263514518738 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.35951870679855347, + "learning_rate": 2.7142726591959968e-05, + "loss": 0.1975, + "step": 11061, + "teacher_loss": 0.17951303720474243 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.5068052411079407, + "learning_rate": 2.7141393004199652e-05, + "loss": 0.1986, + "step": 11062, + "teacher_loss": 0.16437765955924988 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.558967113494873, + "learning_rate": 2.71400591380736e-05, + "loss": 0.2751, + "step": 11063, + "teacher_loss": 0.243523508310318 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.9196529388427734, + "learning_rate": 2.7138724993612386e-05, + "loss": 0.2789, + "step": 11064, + "teacher_loss": 0.20772764086723328 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3080746531486511, + "learning_rate": 2.7137390570846608e-05, + "loss": 0.2311, + "step": 11065, + "teacher_loss": 0.22255653142929077 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4238111078739166, + "learning_rate": 2.7136055869806847e-05, + "loss": 0.278, + "step": 11066, + "teacher_loss": 0.26175373792648315 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.12689395248889923, + "learning_rate": 2.7134720890523713e-05, + "loss": 0.1946, + "step": 11067, + "teacher_loss": 0.20208588242530823 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.1391545832157135, + "learning_rate": 2.713338563302781e-05, + "loss": 0.156, + "step": 11068, + "teacher_loss": 0.15789633989334106 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4829387068748474, + "learning_rate": 2.713205009734975e-05, + "loss": 0.3892, + "step": 11069, + "teacher_loss": 0.37878715991973877 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4808875024318695, + "learning_rate": 2.713071428352016e-05, + "loss": 0.2457, + "step": 11070, + "teacher_loss": 0.21954245865345 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.1887083202600479, + "learning_rate": 2.712937819156966e-05, + "loss": 0.2362, + "step": 11071, + "teacher_loss": 0.24152205884456635 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.6208519339561462, + "learning_rate": 2.7128041821528884e-05, + "loss": 0.4896, + "step": 11072, + "teacher_loss": 0.4749906659126282 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.34722474217414856, + "learning_rate": 2.7126705173428467e-05, + "loss": 0.2354, + "step": 11073, + "teacher_loss": 0.22301414608955383 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.9872570037841797, + "learning_rate": 2.712536824729906e-05, + "loss": 0.3112, + "step": 11074, + "teacher_loss": 0.23612657189369202 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4689207673072815, + "learning_rate": 2.712403104317132e-05, + "loss": 0.2822, + "step": 11075, + "teacher_loss": 0.261398583650589 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.6537783145904541, + "learning_rate": 2.712269356107589e-05, + "loss": 0.283, + "step": 11076, + "teacher_loss": 0.24180655181407928 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.5218496322631836, + "learning_rate": 2.7121355801043444e-05, + "loss": 0.2022, + "step": 11077, + "teacher_loss": 0.1666392832994461 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.25220993161201477, + "learning_rate": 2.7120017763104648e-05, + "loss": 0.2699, + "step": 11078, + "teacher_loss": 0.2718440294265747 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.26389414072036743, + "learning_rate": 2.711867944729019e-05, + "loss": 0.1613, + "step": 11079, + "teacher_loss": 0.1499488800764084 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.6802481412887573, + "learning_rate": 2.711734085363074e-05, + "loss": 0.3652, + "step": 11080, + "teacher_loss": 0.3302406072616577 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.40174198150634766, + "learning_rate": 2.7116001982156995e-05, + "loss": 0.2603, + "step": 11081, + "teacher_loss": 0.24454142153263092 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.267318457365036, + "learning_rate": 2.711466283289965e-05, + "loss": 0.2349, + "step": 11082, + "teacher_loss": 0.2312580794095993 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.5143426060676575, + "learning_rate": 2.7113323405889406e-05, + "loss": 0.3014, + "step": 11083, + "teacher_loss": 0.27775144577026367 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 1.008647084236145, + "learning_rate": 2.7111983701156978e-05, + "loss": 0.4198, + "step": 11084, + "teacher_loss": 0.3543638586997986 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.6783154606819153, + "learning_rate": 2.7110643718733075e-05, + "loss": 0.2506, + "step": 11085, + "teacher_loss": 0.2030808925628662 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4924120306968689, + "learning_rate": 2.7109303458648422e-05, + "loss": 0.3429, + "step": 11086, + "teacher_loss": 0.3262344002723694 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.28460320830345154, + "learning_rate": 2.7107962920933748e-05, + "loss": 0.1657, + "step": 11087, + "teacher_loss": 0.15247558057308197 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.18472126126289368, + "learning_rate": 2.7106622105619787e-05, + "loss": 0.1922, + "step": 11088, + "teacher_loss": 0.19304703176021576 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.20212475955486298, + "learning_rate": 2.7105281012737272e-05, + "loss": 0.1817, + "step": 11089, + "teacher_loss": 0.17938190698623657 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.3637077212333679, + "learning_rate": 2.7103939642316963e-05, + "loss": 0.3078, + "step": 11090, + "teacher_loss": 0.30162686109542847 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.4526901841163635, + "learning_rate": 2.7102597994389604e-05, + "loss": 0.2903, + "step": 11091, + "teacher_loss": 0.2723066210746765 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.5648419857025146, + "learning_rate": 2.7101256068985955e-05, + "loss": 0.2764, + "step": 11092, + "teacher_loss": 0.2443307489156723 + }, + { + "compression_loss": 0.0, + "epoch": 2.0, + "label_loss": 0.22369495034217834, + "learning_rate": 2.7099913866136793e-05, + "loss": 0.1419, + "step": 11093, + "teacher_loss": 0.132841557264328 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.4675179123878479, + "learning_rate": 2.7098571385872882e-05, + "loss": 0.2337, + "step": 11094, + "teacher_loss": 0.20773647725582123 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.6255444288253784, + "learning_rate": 2.7097228628225e-05, + "loss": 0.3842, + "step": 11095, + "teacher_loss": 0.35737138986587524 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.3477962017059326, + "learning_rate": 2.7095885593223934e-05, + "loss": 0.2113, + "step": 11096, + "teacher_loss": 0.1961199939250946 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.5160927176475525, + "learning_rate": 2.7094542280900477e-05, + "loss": 0.2901, + "step": 11097, + "teacher_loss": 0.26497629284858704 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.4052009582519531, + "learning_rate": 2.7093198691285433e-05, + "loss": 0.2761, + "step": 11098, + "teacher_loss": 0.26171940565109253 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.17650824785232544, + "learning_rate": 2.70918548244096e-05, + "loss": 0.2767, + "step": 11099, + "teacher_loss": 0.28787118196487427 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.5297001600265503, + "learning_rate": 2.709051068030378e-05, + "loss": 0.2931, + "step": 11100, + "teacher_loss": 0.2667995095252991 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.9438211917877197, + "learning_rate": 2.7089166258998808e-05, + "loss": 0.725, + "step": 11101, + "teacher_loss": 0.7007371187210083 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.213974267244339, + "learning_rate": 2.7087821560525492e-05, + "loss": 0.2452, + "step": 11102, + "teacher_loss": 0.24863901734352112 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.7154766321182251, + "learning_rate": 2.7086476584914675e-05, + "loss": 0.3544, + "step": 11103, + "teacher_loss": 0.31429457664489746 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.14646224677562714, + "learning_rate": 2.708513133219718e-05, + "loss": 0.1612, + "step": 11104, + "teacher_loss": 0.16288727521896362 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.7129599452018738, + "learning_rate": 2.708378580240386e-05, + "loss": 0.5867, + "step": 11105, + "teacher_loss": 0.5726842284202576 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.7975394129753113, + "learning_rate": 2.7082439995565565e-05, + "loss": 0.2192, + "step": 11106, + "teacher_loss": 0.15496890246868134 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.20353181660175323, + "learning_rate": 2.708109391171314e-05, + "loss": 0.2028, + "step": 11107, + "teacher_loss": 0.2027340829372406 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.33372199535369873, + "learning_rate": 2.7079747550877455e-05, + "loss": 0.2718, + "step": 11108, + "teacher_loss": 0.26488804817199707 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2949153780937195, + "learning_rate": 2.7078400913089376e-05, + "loss": 0.2564, + "step": 11109, + "teacher_loss": 0.25209251046180725 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.6103252172470093, + "learning_rate": 2.7077053998379778e-05, + "loss": 0.4434, + "step": 11110, + "teacher_loss": 0.42488181591033936 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 1.1178202629089355, + "learning_rate": 2.707570680677954e-05, + "loss": 0.3193, + "step": 11111, + "teacher_loss": 0.23062273859977722 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.5445696115493774, + "learning_rate": 2.707435933831955e-05, + "loss": 0.3803, + "step": 11112, + "teacher_loss": 0.3620012402534485 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.6099430322647095, + "learning_rate": 2.7073011593030697e-05, + "loss": 0.385, + "step": 11113, + "teacher_loss": 0.3600549101829529 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.5297979712486267, + "learning_rate": 2.707166357094389e-05, + "loss": 0.2802, + "step": 11114, + "teacher_loss": 0.2524702250957489 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.540873110294342, + "learning_rate": 2.7070315272090027e-05, + "loss": 0.2573, + "step": 11115, + "teacher_loss": 0.22574713826179504 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.33703333139419556, + "learning_rate": 2.7068966696500025e-05, + "loss": 0.2914, + "step": 11116, + "teacher_loss": 0.28633999824523926 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.17114807665348053, + "learning_rate": 2.70676178442048e-05, + "loss": 0.1954, + "step": 11117, + "teacher_loss": 0.19810548424720764 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.7181960344314575, + "learning_rate": 2.706626871523528e-05, + "loss": 0.2806, + "step": 11118, + "teacher_loss": 0.231967955827713 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2218027412891388, + "learning_rate": 2.7064919309622395e-05, + "loss": 0.2652, + "step": 11119, + "teacher_loss": 0.2700613737106323 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.3960804045200348, + "learning_rate": 2.7063569627397082e-05, + "loss": 0.2457, + "step": 11120, + "teacher_loss": 0.2289600670337677 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.22098730504512787, + "learning_rate": 2.706221966859029e-05, + "loss": 0.2429, + "step": 11121, + "teacher_loss": 0.24536684155464172 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.7551677823066711, + "learning_rate": 2.7060869433232957e-05, + "loss": 0.4162, + "step": 11122, + "teacher_loss": 0.3785792589187622 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.8047893047332764, + "learning_rate": 2.7059518921356053e-05, + "loss": 0.5408, + "step": 11123, + "teacher_loss": 0.5114735960960388 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.33568909764289856, + "learning_rate": 2.7058168132990536e-05, + "loss": 0.2191, + "step": 11124, + "teacher_loss": 0.2061665952205658 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2605346739292145, + "learning_rate": 2.7056817068167377e-05, + "loss": 0.2017, + "step": 11125, + "teacher_loss": 0.19519099593162537 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.516914427280426, + "learning_rate": 2.705546572691755e-05, + "loss": 0.3492, + "step": 11126, + "teacher_loss": 0.3305789828300476 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2928575277328491, + "learning_rate": 2.705411410927204e-05, + "loss": 0.3136, + "step": 11127, + "teacher_loss": 0.3159273862838745 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.39623790979385376, + "learning_rate": 2.7052762215261828e-05, + "loss": 0.2864, + "step": 11128, + "teacher_loss": 0.2741583585739136 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.3782995939254761, + "learning_rate": 2.705141004491792e-05, + "loss": 0.2384, + "step": 11129, + "teacher_loss": 0.22285538911819458 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.28563278913497925, + "learning_rate": 2.705005759827131e-05, + "loss": 0.2166, + "step": 11130, + "teacher_loss": 0.20897230505943298 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.44995877146720886, + "learning_rate": 2.7048704875353004e-05, + "loss": 0.3323, + "step": 11131, + "teacher_loss": 0.31926870346069336 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.3307040333747864, + "learning_rate": 2.7047351876194022e-05, + "loss": 0.2117, + "step": 11132, + "teacher_loss": 0.19846510887145996 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.42214131355285645, + "learning_rate": 2.7045998600825382e-05, + "loss": 0.2702, + "step": 11133, + "teacher_loss": 0.25335264205932617 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.6219674348831177, + "learning_rate": 2.7044645049278112e-05, + "loss": 0.519, + "step": 11134, + "teacher_loss": 0.5075709223747253 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2580287456512451, + "learning_rate": 2.7043291221583237e-05, + "loss": 0.2115, + "step": 11135, + "teacher_loss": 0.20631955564022064 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.11969958990812302, + "learning_rate": 2.7041937117771807e-05, + "loss": 0.152, + "step": 11136, + "teacher_loss": 0.15562891960144043 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.9449215531349182, + "learning_rate": 2.7040582737874857e-05, + "loss": 0.6072, + "step": 11137, + "teacher_loss": 0.5696402788162231 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.6992002725601196, + "learning_rate": 2.7039228081923448e-05, + "loss": 0.3539, + "step": 11138, + "teacher_loss": 0.3154977560043335 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.3310987949371338, + "learning_rate": 2.7037873149948637e-05, + "loss": 0.4145, + "step": 11139, + "teacher_loss": 0.4237608313560486 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2851220369338989, + "learning_rate": 2.7036517941981486e-05, + "loss": 0.2197, + "step": 11140, + "teacher_loss": 0.2124020904302597 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.835666835308075, + "learning_rate": 2.7035162458053057e-05, + "loss": 0.5865, + "step": 11141, + "teacher_loss": 0.558761715888977 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.26387423276901245, + "learning_rate": 2.7033806698194444e-05, + "loss": 0.222, + "step": 11142, + "teacher_loss": 0.21738894283771515 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.501766562461853, + "learning_rate": 2.703245066243672e-05, + "loss": 0.3029, + "step": 11143, + "teacher_loss": 0.28080326318740845 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.2797282338142395, + "learning_rate": 2.703109435081098e-05, + "loss": 0.1707, + "step": 11144, + "teacher_loss": 0.1586271971464157 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.5871778726577759, + "learning_rate": 2.7029737763348316e-05, + "loss": 0.477, + "step": 11145, + "teacher_loss": 0.4648021161556244 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.4689687490463257, + "learning_rate": 2.702838090007983e-05, + "loss": 0.2785, + "step": 11146, + "teacher_loss": 0.2573047876358032 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.6790783405303955, + "learning_rate": 2.702702376103664e-05, + "loss": 0.3453, + "step": 11147, + "teacher_loss": 0.3082126975059509 + }, + { + "compression_loss": 0.0, + "epoch": 2.01, + "label_loss": 0.5030022859573364, + "learning_rate": 2.7025666346249845e-05, + "loss": 0.2926, + "step": 11148, + "teacher_loss": 0.26927411556243896 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.2587215006351471, + "learning_rate": 2.702430865575058e-05, + "loss": 0.2026, + "step": 11149, + "teacher_loss": 0.19634869694709778 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.2933503985404968, + "learning_rate": 2.7022950689569968e-05, + "loss": 0.2044, + "step": 11150, + "teacher_loss": 0.19447465240955353 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.20631949603557587, + "learning_rate": 2.7021592447739143e-05, + "loss": 0.183, + "step": 11151, + "teacher_loss": 0.18043893575668335 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.4691442847251892, + "learning_rate": 2.702023393028925e-05, + "loss": 0.2829, + "step": 11152, + "teacher_loss": 0.2621912360191345 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5620827674865723, + "learning_rate": 2.7018875137251424e-05, + "loss": 0.4542, + "step": 11153, + "teacher_loss": 0.442263126373291 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.4237462282180786, + "learning_rate": 2.7017516068656832e-05, + "loss": 0.4438, + "step": 11154, + "teacher_loss": 0.44599688053131104 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.10700087249279022, + "learning_rate": 2.7016156724536625e-05, + "loss": 0.1523, + "step": 11155, + "teacher_loss": 0.15731433033943176 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3748167157173157, + "learning_rate": 2.7014797104921967e-05, + "loss": 0.2594, + "step": 11156, + "teacher_loss": 0.24663017690181732 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6609557867050171, + "learning_rate": 2.701343720984404e-05, + "loss": 0.2679, + "step": 11157, + "teacher_loss": 0.22418633103370667 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 1.7564237117767334, + "learning_rate": 2.701207703933401e-05, + "loss": 0.4517, + "step": 11158, + "teacher_loss": 0.30674880743026733 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.40274733304977417, + "learning_rate": 2.701071659342307e-05, + "loss": 0.1648, + "step": 11159, + "teacher_loss": 0.1384095549583435 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.8992514610290527, + "learning_rate": 2.700935587214241e-05, + "loss": 0.3257, + "step": 11160, + "teacher_loss": 0.2620210349559784 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.35065388679504395, + "learning_rate": 2.7007994875523222e-05, + "loss": 0.2666, + "step": 11161, + "teacher_loss": 0.25723132491111755 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6221776604652405, + "learning_rate": 2.7006633603596712e-05, + "loss": 0.2474, + "step": 11162, + "teacher_loss": 0.20573300123214722 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5098869204521179, + "learning_rate": 2.7005272056394096e-05, + "loss": 0.2631, + "step": 11163, + "teacher_loss": 0.23568841814994812 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.40784379839897156, + "learning_rate": 2.7003910233946588e-05, + "loss": 0.385, + "step": 11164, + "teacher_loss": 0.38241130113601685 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3961941599845886, + "learning_rate": 2.70025481362854e-05, + "loss": 0.2764, + "step": 11165, + "teacher_loss": 0.263124018907547 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.17299503087997437, + "learning_rate": 2.7001185763441773e-05, + "loss": 0.2039, + "step": 11166, + "teacher_loss": 0.2072961926460266 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.36186981201171875, + "learning_rate": 2.699982311544694e-05, + "loss": 0.3029, + "step": 11167, + "teacher_loss": 0.296355664730072 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.18529130518436432, + "learning_rate": 2.699846019233214e-05, + "loss": 0.1953, + "step": 11168, + "teacher_loss": 0.1963636577129364 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6550931930541992, + "learning_rate": 2.6997096994128616e-05, + "loss": 0.2809, + "step": 11169, + "teacher_loss": 0.23934370279312134 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6422407627105713, + "learning_rate": 2.699573352086763e-05, + "loss": 0.2547, + "step": 11170, + "teacher_loss": 0.2115880697965622 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.37764179706573486, + "learning_rate": 2.699436977258044e-05, + "loss": 0.2333, + "step": 11171, + "teacher_loss": 0.21725022792816162 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5187445282936096, + "learning_rate": 2.699300574929831e-05, + "loss": 0.1997, + "step": 11172, + "teacher_loss": 0.1642317771911621 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.23255422711372375, + "learning_rate": 2.699164145105252e-05, + "loss": 0.2217, + "step": 11173, + "teacher_loss": 0.22048154473304749 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3505420684814453, + "learning_rate": 2.699027687787434e-05, + "loss": 0.2619, + "step": 11174, + "teacher_loss": 0.2520436644554138 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5439131855964661, + "learning_rate": 2.698891202979506e-05, + "loss": 0.2504, + "step": 11175, + "teacher_loss": 0.2177654504776001 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.49192696809768677, + "learning_rate": 2.698754690684597e-05, + "loss": 0.25, + "step": 11176, + "teacher_loss": 0.22315248847007751 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.7508048415184021, + "learning_rate": 2.6986181509058376e-05, + "loss": 0.4346, + "step": 11177, + "teacher_loss": 0.39951199293136597 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.9282213449478149, + "learning_rate": 2.6984815836463572e-05, + "loss": 0.3638, + "step": 11178, + "teacher_loss": 0.30112171173095703 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.9537012577056885, + "learning_rate": 2.6983449889092874e-05, + "loss": 0.4629, + "step": 11179, + "teacher_loss": 0.40836164355278015 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5213973522186279, + "learning_rate": 2.69820836669776e-05, + "loss": 0.3205, + "step": 11180, + "teacher_loss": 0.2982255816459656 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6989810466766357, + "learning_rate": 2.698071717014907e-05, + "loss": 0.3349, + "step": 11181, + "teacher_loss": 0.29449018836021423 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3838341236114502, + "learning_rate": 2.6979350398638616e-05, + "loss": 0.2182, + "step": 11182, + "teacher_loss": 0.19980354607105255 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5215386748313904, + "learning_rate": 2.6977983352477574e-05, + "loss": 0.2612, + "step": 11183, + "teacher_loss": 0.23231905698776245 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.20742267370224, + "learning_rate": 2.697661603169728e-05, + "loss": 0.1814, + "step": 11184, + "teacher_loss": 0.17850598692893982 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5252453088760376, + "learning_rate": 2.6975248436329097e-05, + "loss": 0.2202, + "step": 11185, + "teacher_loss": 0.1863296627998352 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.4949405789375305, + "learning_rate": 2.6973880566404364e-05, + "loss": 0.3584, + "step": 11186, + "teacher_loss": 0.3432287573814392 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5451057553291321, + "learning_rate": 2.6972512421954453e-05, + "loss": 0.2823, + "step": 11187, + "teacher_loss": 0.25311341881752014 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5243842601776123, + "learning_rate": 2.6971144003010725e-05, + "loss": 0.558, + "step": 11188, + "teacher_loss": 0.5617440938949585 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.27836930751800537, + "learning_rate": 2.6969775309604558e-05, + "loss": 0.2803, + "step": 11189, + "teacher_loss": 0.2805447578430176 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.555506706237793, + "learning_rate": 2.696840634176733e-05, + "loss": 0.3163, + "step": 11190, + "teacher_loss": 0.28967010974884033 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.21295493841171265, + "learning_rate": 2.696703709953043e-05, + "loss": 0.1706, + "step": 11191, + "teacher_loss": 0.16592423617839813 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6925020217895508, + "learning_rate": 2.6965667582925247e-05, + "loss": 0.3616, + "step": 11192, + "teacher_loss": 0.3248024582862854 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3998020589351654, + "learning_rate": 2.696429779198318e-05, + "loss": 0.2529, + "step": 11193, + "teacher_loss": 0.23656976222991943 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6629601716995239, + "learning_rate": 2.6962927726735637e-05, + "loss": 0.3501, + "step": 11194, + "teacher_loss": 0.31535208225250244 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.25233015418052673, + "learning_rate": 2.6961557387214022e-05, + "loss": 0.3444, + "step": 11195, + "teacher_loss": 0.3546769917011261 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.670173704624176, + "learning_rate": 2.6960186773449767e-05, + "loss": 0.209, + "step": 11196, + "teacher_loss": 0.15775738656520844 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3421512544155121, + "learning_rate": 2.6958815885474285e-05, + "loss": 0.197, + "step": 11197, + "teacher_loss": 0.18090704083442688 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.3140241205692291, + "learning_rate": 2.6957444723319005e-05, + "loss": 0.3106, + "step": 11198, + "teacher_loss": 0.3102579116821289 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.6438047885894775, + "learning_rate": 2.6956073287015373e-05, + "loss": 0.279, + "step": 11199, + "teacher_loss": 0.2384609878063202 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5537392497062683, + "learning_rate": 2.6954701576594827e-05, + "loss": 0.2588, + "step": 11200, + "teacher_loss": 0.22598521411418915 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.28569895029067993, + "learning_rate": 2.695332959208881e-05, + "loss": 0.232, + "step": 11201, + "teacher_loss": 0.2260473668575287 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.5608729124069214, + "learning_rate": 2.695195733352879e-05, + "loss": 0.2621, + "step": 11202, + "teacher_loss": 0.2289191484451294 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.29268136620521545, + "learning_rate": 2.695058480094622e-05, + "loss": 0.2642, + "step": 11203, + "teacher_loss": 0.2610268294811249 + }, + { + "compression_loss": 0.0, + "epoch": 2.02, + "label_loss": 0.8299776911735535, + "learning_rate": 2.6949211994372566e-05, + "loss": 0.3556, + "step": 11204, + "teacher_loss": 0.3028719127178192 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.461181640625, + "learning_rate": 2.6947838913839314e-05, + "loss": 0.373, + "step": 11205, + "teacher_loss": 0.3632575571537018 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.9282543063163757, + "learning_rate": 2.6946465559377934e-05, + "loss": 0.3246, + "step": 11206, + "teacher_loss": 0.25754567980766296 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5561931133270264, + "learning_rate": 2.6945091931019916e-05, + "loss": 0.3541, + "step": 11207, + "teacher_loss": 0.33169811964035034 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3426342010498047, + "learning_rate": 2.6943718028796752e-05, + "loss": 0.2104, + "step": 11208, + "teacher_loss": 0.1957625299692154 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.6001682281494141, + "learning_rate": 2.6942343852739942e-05, + "loss": 0.3417, + "step": 11209, + "teacher_loss": 0.3130132555961609 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.7260804772377014, + "learning_rate": 2.6940969402880993e-05, + "loss": 0.246, + "step": 11210, + "teacher_loss": 0.19261445105075836 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.4389238953590393, + "learning_rate": 2.693959467925142e-05, + "loss": 0.2032, + "step": 11211, + "teacher_loss": 0.17700113356113434 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5990551710128784, + "learning_rate": 2.6938219681882733e-05, + "loss": 0.3283, + "step": 11212, + "teacher_loss": 0.29823437333106995 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.27416127920150757, + "learning_rate": 2.6936844410806463e-05, + "loss": 0.2293, + "step": 11213, + "teacher_loss": 0.22437047958374023 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.45993781089782715, + "learning_rate": 2.6935468866054142e-05, + "loss": 0.3432, + "step": 11214, + "teacher_loss": 0.33028265833854675 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.9485346078872681, + "learning_rate": 2.69340930476573e-05, + "loss": 0.3582, + "step": 11215, + "teacher_loss": 0.29263997077941895 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.334402859210968, + "learning_rate": 2.693271695564749e-05, + "loss": 0.1993, + "step": 11216, + "teacher_loss": 0.184321328997612 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3555179834365845, + "learning_rate": 2.693134059005625e-05, + "loss": 0.2669, + "step": 11217, + "teacher_loss": 0.2570229768753052 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.2551718056201935, + "learning_rate": 2.692996395091515e-05, + "loss": 0.2988, + "step": 11218, + "teacher_loss": 0.30368751287460327 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.724045991897583, + "learning_rate": 2.6928587038255734e-05, + "loss": 0.3357, + "step": 11219, + "teacher_loss": 0.29252979159355164 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5547541379928589, + "learning_rate": 2.692720985210959e-05, + "loss": 0.2407, + "step": 11220, + "teacher_loss": 0.20585399866104126 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.4354609251022339, + "learning_rate": 2.692583239250828e-05, + "loss": 0.2408, + "step": 11221, + "teacher_loss": 0.21916991472244263 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.27566632628440857, + "learning_rate": 2.692445465948339e-05, + "loss": 0.2416, + "step": 11222, + "teacher_loss": 0.2377694696187973 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5918866395950317, + "learning_rate": 2.6923076653066503e-05, + "loss": 0.3038, + "step": 11223, + "teacher_loss": 0.27177971601486206 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5092679262161255, + "learning_rate": 2.6921698373289217e-05, + "loss": 0.2093, + "step": 11224, + "teacher_loss": 0.1759694218635559 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.21625277400016785, + "learning_rate": 2.692031982018313e-05, + "loss": 0.2248, + "step": 11225, + "teacher_loss": 0.22579748928546906 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.39595478773117065, + "learning_rate": 2.691894099377985e-05, + "loss": 0.2129, + "step": 11226, + "teacher_loss": 0.1925710290670395 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.41937726736068726, + "learning_rate": 2.691756189411099e-05, + "loss": 0.389, + "step": 11227, + "teacher_loss": 0.38563358783721924 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.8448793888092041, + "learning_rate": 2.6916182521208164e-05, + "loss": 0.3935, + "step": 11228, + "teacher_loss": 0.3433764576911926 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.18278658390045166, + "learning_rate": 2.6914802875103003e-05, + "loss": 0.192, + "step": 11229, + "teacher_loss": 0.1929853856563568 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.27039605379104614, + "learning_rate": 2.691342295582713e-05, + "loss": 0.2216, + "step": 11230, + "teacher_loss": 0.21617646515369415 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3692094683647156, + "learning_rate": 2.6912042763412185e-05, + "loss": 0.1878, + "step": 11231, + "teacher_loss": 0.16764050722122192 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.265606164932251, + "learning_rate": 2.6910662297889818e-05, + "loss": 0.1987, + "step": 11232, + "teacher_loss": 0.1912379264831543 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3151041269302368, + "learning_rate": 2.6909281559291672e-05, + "loss": 0.2104, + "step": 11233, + "teacher_loss": 0.19877415895462036 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.28362464904785156, + "learning_rate": 2.6907900547649406e-05, + "loss": 0.2001, + "step": 11234, + "teacher_loss": 0.1907707005739212 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.4875852167606354, + "learning_rate": 2.6906519262994683e-05, + "loss": 0.3403, + "step": 11235, + "teacher_loss": 0.32390546798706055 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3244495987892151, + "learning_rate": 2.690513770535917e-05, + "loss": 0.2258, + "step": 11236, + "teacher_loss": 0.21487928926944733 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.23239167034626007, + "learning_rate": 2.690375587477455e-05, + "loss": 0.2147, + "step": 11237, + "teacher_loss": 0.21272554993629456 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.33954235911369324, + "learning_rate": 2.690237377127249e-05, + "loss": 0.1878, + "step": 11238, + "teacher_loss": 0.1709732711315155 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.42640507221221924, + "learning_rate": 2.690099139488468e-05, + "loss": 0.2295, + "step": 11239, + "teacher_loss": 0.2075849324464798 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.25878241658210754, + "learning_rate": 2.6899608745642823e-05, + "loss": 0.1851, + "step": 11240, + "teacher_loss": 0.17694270610809326 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5628886222839355, + "learning_rate": 2.6898225823578616e-05, + "loss": 0.2964, + "step": 11241, + "teacher_loss": 0.26674625277519226 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5313345193862915, + "learning_rate": 2.6896842628723766e-05, + "loss": 0.2694, + "step": 11242, + "teacher_loss": 0.24032220244407654 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.2580585777759552, + "learning_rate": 2.6895459161109978e-05, + "loss": 0.2269, + "step": 11243, + "teacher_loss": 0.22342915832996368 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3754185438156128, + "learning_rate": 2.689407542076898e-05, + "loss": 0.2921, + "step": 11244, + "teacher_loss": 0.2828599214553833 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.29227176308631897, + "learning_rate": 2.689269140773249e-05, + "loss": 0.24, + "step": 11245, + "teacher_loss": 0.23417061567306519 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.370419442653656, + "learning_rate": 2.689130712203224e-05, + "loss": 0.2153, + "step": 11246, + "teacher_loss": 0.19805026054382324 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.20756079256534576, + "learning_rate": 2.688992256369997e-05, + "loss": 0.2225, + "step": 11247, + "teacher_loss": 0.22415073215961456 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.2742537260055542, + "learning_rate": 2.688853773276743e-05, + "loss": 0.2119, + "step": 11248, + "teacher_loss": 0.2049185335636139 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.8833494186401367, + "learning_rate": 2.6887152629266354e-05, + "loss": 0.5107, + "step": 11249, + "teacher_loss": 0.4692646265029907 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5896503925323486, + "learning_rate": 2.6885767253228515e-05, + "loss": 0.3619, + "step": 11250, + "teacher_loss": 0.33654725551605225 + }, + { + "epoch": 2.03, + "eval_exact_match": 79.30936613055819, + "eval_f1": 86.97385439302845, + "step": 11250 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.33625927567481995, + "learning_rate": 2.688438160468567e-05, + "loss": 0.3439, + "step": 11251, + "teacher_loss": 0.34474360942840576 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.45378538966178894, + "learning_rate": 2.688299568366958e-05, + "loss": 0.3777, + "step": 11252, + "teacher_loss": 0.3692609965801239 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.03842146694660187, + "learning_rate": 2.688160949021203e-05, + "loss": 0.1806, + "step": 11253, + "teacher_loss": 0.19644439220428467 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.5745305418968201, + "learning_rate": 2.6880223024344798e-05, + "loss": 0.3926, + "step": 11254, + "teacher_loss": 0.37234410643577576 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.37490731477737427, + "learning_rate": 2.6878836286099665e-05, + "loss": 0.2191, + "step": 11255, + "teacher_loss": 0.20183223485946655 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.6139047145843506, + "learning_rate": 2.6877449275508435e-05, + "loss": 0.3365, + "step": 11256, + "teacher_loss": 0.30570706725120544 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.7160766124725342, + "learning_rate": 2.6876061992602903e-05, + "loss": 0.9038, + "step": 11257, + "teacher_loss": 0.9247101545333862 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.6901978254318237, + "learning_rate": 2.6874674437414876e-05, + "loss": 0.3751, + "step": 11258, + "teacher_loss": 0.34007206559181213 + }, + { + "compression_loss": 0.0, + "epoch": 2.03, + "label_loss": 0.3208482265472412, + "learning_rate": 2.6873286609976165e-05, + "loss": 0.2961, + "step": 11259, + "teacher_loss": 0.293396532535553 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4595295488834381, + "learning_rate": 2.6871898510318588e-05, + "loss": 0.5129, + "step": 11260, + "teacher_loss": 0.5188044309616089 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.24841484427452087, + "learning_rate": 2.6870510138473977e-05, + "loss": 0.1989, + "step": 11261, + "teacher_loss": 0.19337168335914612 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6570264101028442, + "learning_rate": 2.6869121494474152e-05, + "loss": 0.2866, + "step": 11262, + "teacher_loss": 0.24548864364624023 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.5054397583007812, + "learning_rate": 2.686773257835096e-05, + "loss": 0.2242, + "step": 11263, + "teacher_loss": 0.1929645538330078 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.3450898230075836, + "learning_rate": 2.6866343390136245e-05, + "loss": 0.1972, + "step": 11264, + "teacher_loss": 0.18075133860111237 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6953567862510681, + "learning_rate": 2.686495392986185e-05, + "loss": 0.3408, + "step": 11265, + "teacher_loss": 0.30141323804855347 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6767400503158569, + "learning_rate": 2.686356419755963e-05, + "loss": 0.3771, + "step": 11266, + "teacher_loss": 0.3438029885292053 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.2951080799102783, + "learning_rate": 2.6862174193261457e-05, + "loss": 0.3589, + "step": 11267, + "teacher_loss": 0.3660429120063782 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.5776809453964233, + "learning_rate": 2.6860783916999196e-05, + "loss": 0.2764, + "step": 11268, + "teacher_loss": 0.2429770529270172 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.5957028865814209, + "learning_rate": 2.6859393368804718e-05, + "loss": 0.3518, + "step": 11269, + "teacher_loss": 0.3247438073158264 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.23803962767124176, + "learning_rate": 2.6858002548709903e-05, + "loss": 0.2108, + "step": 11270, + "teacher_loss": 0.20778590440750122 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.629150390625, + "learning_rate": 2.6856611456746647e-05, + "loss": 0.285, + "step": 11271, + "teacher_loss": 0.24671012163162231 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.3847843110561371, + "learning_rate": 2.6855220092946834e-05, + "loss": 0.2207, + "step": 11272, + "teacher_loss": 0.20245277881622314 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.5425306558609009, + "learning_rate": 2.6853828457342372e-05, + "loss": 0.3445, + "step": 11273, + "teacher_loss": 0.32247036695480347 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.3355640172958374, + "learning_rate": 2.685243654996516e-05, + "loss": 0.2184, + "step": 11274, + "teacher_loss": 0.20534522831439972 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.31646233797073364, + "learning_rate": 2.6851044370847116e-05, + "loss": 0.2175, + "step": 11275, + "teacher_loss": 0.20650577545166016 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.23984137177467346, + "learning_rate": 2.6849651920020154e-05, + "loss": 0.1868, + "step": 11276, + "teacher_loss": 0.18094469606876373 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.2742348313331604, + "learning_rate": 2.6848259197516197e-05, + "loss": 0.2198, + "step": 11277, + "teacher_loss": 0.21380004286766052 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4496724605560303, + "learning_rate": 2.6846866203367185e-05, + "loss": 0.2501, + "step": 11278, + "teacher_loss": 0.22789807617664337 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4974231719970703, + "learning_rate": 2.684547293760505e-05, + "loss": 0.2596, + "step": 11279, + "teacher_loss": 0.23316732048988342 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.7012125253677368, + "learning_rate": 2.6844079400261735e-05, + "loss": 0.354, + "step": 11280, + "teacher_loss": 0.31540751457214355 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.34276098012924194, + "learning_rate": 2.6842685591369187e-05, + "loss": 0.1788, + "step": 11281, + "teacher_loss": 0.16059181094169617 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6426563262939453, + "learning_rate": 2.6841291510959363e-05, + "loss": 0.4463, + "step": 11282, + "teacher_loss": 0.42451998591423035 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4309924244880676, + "learning_rate": 2.6839897159064228e-05, + "loss": 0.2688, + "step": 11283, + "teacher_loss": 0.25075024366378784 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.11389905214309692, + "learning_rate": 2.683850253571575e-05, + "loss": 0.1805, + "step": 11284, + "teacher_loss": 0.1879309117794037 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.8952006101608276, + "learning_rate": 2.6837107640945904e-05, + "loss": 0.2915, + "step": 11285, + "teacher_loss": 0.22444184124469757 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 1.0161534547805786, + "learning_rate": 2.6835712474786667e-05, + "loss": 0.9861, + "step": 11286, + "teacher_loss": 0.9827646613121033 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4063742756843567, + "learning_rate": 2.6834317037270034e-05, + "loss": 0.371, + "step": 11287, + "teacher_loss": 0.36708134412765503 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.3163510859012604, + "learning_rate": 2.6832921328427985e-05, + "loss": 0.3928, + "step": 11288, + "teacher_loss": 0.4013279676437378 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.22254332900047302, + "learning_rate": 2.6831525348292532e-05, + "loss": 0.2131, + "step": 11289, + "teacher_loss": 0.2120695412158966 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.3451879620552063, + "learning_rate": 2.6830129096895672e-05, + "loss": 0.3024, + "step": 11290, + "teacher_loss": 0.29770058393478394 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6977245807647705, + "learning_rate": 2.682873257426942e-05, + "loss": 0.2454, + "step": 11291, + "teacher_loss": 0.19514703750610352 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.25241440534591675, + "learning_rate": 2.68273357804458e-05, + "loss": 0.2084, + "step": 11292, + "teacher_loss": 0.2035626471042633 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.7038175463676453, + "learning_rate": 2.6825938715456825e-05, + "loss": 0.4133, + "step": 11293, + "teacher_loss": 0.38106220960617065 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6573154926300049, + "learning_rate": 2.682454137933453e-05, + "loss": 0.3725, + "step": 11294, + "teacher_loss": 0.34083160758018494 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.29671037197113037, + "learning_rate": 2.6823143772110957e-05, + "loss": 0.2429, + "step": 11295, + "teacher_loss": 0.23690149188041687 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 1.121340274810791, + "learning_rate": 2.6821745893818145e-05, + "loss": 0.4853, + "step": 11296, + "teacher_loss": 0.414655864238739 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.6127495765686035, + "learning_rate": 2.6820347744488143e-05, + "loss": 0.3295, + "step": 11297, + "teacher_loss": 0.29807665944099426 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.2527361214160919, + "learning_rate": 2.6818949324153003e-05, + "loss": 0.145, + "step": 11298, + "teacher_loss": 0.13300520181655884 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.598783016204834, + "learning_rate": 2.6817550632844792e-05, + "loss": 0.3626, + "step": 11299, + "teacher_loss": 0.3363194465637207 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.2856517434120178, + "learning_rate": 2.6816151670595576e-05, + "loss": 0.2749, + "step": 11300, + "teacher_loss": 0.27373206615448 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.19810280203819275, + "learning_rate": 2.6814752437437428e-05, + "loss": 0.2439, + "step": 11301, + "teacher_loss": 0.2490319311618805 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.20336779952049255, + "learning_rate": 2.6813352933402432e-05, + "loss": 0.1824, + "step": 11302, + "teacher_loss": 0.18003855645656586 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.43892931938171387, + "learning_rate": 2.6811953158522668e-05, + "loss": 0.2904, + "step": 11303, + "teacher_loss": 0.2739505171775818 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.5004748702049255, + "learning_rate": 2.6810553112830235e-05, + "loss": 0.2635, + "step": 11304, + "teacher_loss": 0.23719993233680725 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4722036123275757, + "learning_rate": 2.6809152796357225e-05, + "loss": 0.2712, + "step": 11305, + "teacher_loss": 0.24886423349380493 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.4347146153450012, + "learning_rate": 2.680775220913575e-05, + "loss": 0.1817, + "step": 11306, + "teacher_loss": 0.153592050075531 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.8150264620780945, + "learning_rate": 2.6806351351197923e-05, + "loss": 0.3914, + "step": 11307, + "teacher_loss": 0.34438472986221313 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.2456343173980713, + "learning_rate": 2.680495022257585e-05, + "loss": 0.1766, + "step": 11308, + "teacher_loss": 0.1689676195383072 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.3890421390533447, + "learning_rate": 2.6803548823301666e-05, + "loss": 0.2877, + "step": 11309, + "teacher_loss": 0.27645498514175415 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.7891196012496948, + "learning_rate": 2.6802147153407493e-05, + "loss": 0.3037, + "step": 11310, + "teacher_loss": 0.24971838295459747 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.25211894512176514, + "learning_rate": 2.680074521292547e-05, + "loss": 0.2279, + "step": 11311, + "teacher_loss": 0.22523453831672668 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.9736194014549255, + "learning_rate": 2.679934300188774e-05, + "loss": 0.4806, + "step": 11312, + "teacher_loss": 0.4257797300815582 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.71315997838974, + "learning_rate": 2.6797940520326453e-05, + "loss": 0.4972, + "step": 11313, + "teacher_loss": 0.47322559356689453 + }, + { + "compression_loss": 0.0, + "epoch": 2.04, + "label_loss": 0.5923274755477905, + "learning_rate": 2.679653776827376e-05, + "loss": 0.2821, + "step": 11314, + "teacher_loss": 0.2476511150598526 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.7088020443916321, + "learning_rate": 2.679513474576183e-05, + "loss": 0.3402, + "step": 11315, + "teacher_loss": 0.2992664575576782 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.8937879800796509, + "learning_rate": 2.679373145282282e-05, + "loss": 0.3111, + "step": 11316, + "teacher_loss": 0.2463269829750061 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.7397705316543579, + "learning_rate": 2.67923278894889e-05, + "loss": 0.2984, + "step": 11317, + "teacher_loss": 0.24934226274490356 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4082082509994507, + "learning_rate": 2.6790924055792265e-05, + "loss": 0.2565, + "step": 11318, + "teacher_loss": 0.23964270949363708 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.34883540868759155, + "learning_rate": 2.6789519951765092e-05, + "loss": 0.2141, + "step": 11319, + "teacher_loss": 0.19908207654953003 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4200015664100647, + "learning_rate": 2.678811557743957e-05, + "loss": 0.2762, + "step": 11320, + "teacher_loss": 0.26026713848114014 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.31462085247039795, + "learning_rate": 2.6786710932847905e-05, + "loss": 0.232, + "step": 11321, + "teacher_loss": 0.2228488326072693 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.6154340505599976, + "learning_rate": 2.6785306018022292e-05, + "loss": 0.3919, + "step": 11322, + "teacher_loss": 0.36701780557632446 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.32675957679748535, + "learning_rate": 2.6783900832994948e-05, + "loss": 0.1918, + "step": 11323, + "teacher_loss": 0.17679722607135773 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4512156546115875, + "learning_rate": 2.6782495377798087e-05, + "loss": 0.2337, + "step": 11324, + "teacher_loss": 0.2095463126897812 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.7122188806533813, + "learning_rate": 2.6781089652463936e-05, + "loss": 0.2859, + "step": 11325, + "teacher_loss": 0.2385624200105667 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5092610120773315, + "learning_rate": 2.6779683657024718e-05, + "loss": 0.2218, + "step": 11326, + "teacher_loss": 0.189883291721344 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.6519548296928406, + "learning_rate": 2.677827739151267e-05, + "loss": 0.2497, + "step": 11327, + "teacher_loss": 0.2050163894891739 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.7390754222869873, + "learning_rate": 2.6776870855960033e-05, + "loss": 0.3189, + "step": 11328, + "teacher_loss": 0.2721847891807556 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.17489303648471832, + "learning_rate": 2.6775464050399063e-05, + "loss": 0.1754, + "step": 11329, + "teacher_loss": 0.17546510696411133 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5513505339622498, + "learning_rate": 2.6774056974862e-05, + "loss": 0.3001, + "step": 11330, + "teacher_loss": 0.2721668779850006 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.2625054717063904, + "learning_rate": 2.677264962938112e-05, + "loss": 0.2391, + "step": 11331, + "teacher_loss": 0.23654712736606598 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4441652297973633, + "learning_rate": 2.677124201398867e-05, + "loss": 0.2379, + "step": 11332, + "teacher_loss": 0.21501559019088745 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5385757088661194, + "learning_rate": 2.6769834128716938e-05, + "loss": 0.3488, + "step": 11333, + "teacher_loss": 0.3276699483394623 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5285783410072327, + "learning_rate": 2.6768425973598194e-05, + "loss": 0.244, + "step": 11334, + "teacher_loss": 0.21239647269248962 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.264853298664093, + "learning_rate": 2.676701754866473e-05, + "loss": 0.2274, + "step": 11335, + "teacher_loss": 0.22323128581047058 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.3245318531990051, + "learning_rate": 2.676560885394883e-05, + "loss": 0.2723, + "step": 11336, + "teacher_loss": 0.2664865255355835 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.6064648628234863, + "learning_rate": 2.6764199889482797e-05, + "loss": 0.3021, + "step": 11337, + "teacher_loss": 0.26824620366096497 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.481045126914978, + "learning_rate": 2.676279065529893e-05, + "loss": 0.2274, + "step": 11338, + "teacher_loss": 0.199247807264328 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.33887118101119995, + "learning_rate": 2.676138115142954e-05, + "loss": 0.2772, + "step": 11339, + "teacher_loss": 0.27035829424858093 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.33583134412765503, + "learning_rate": 2.6759971377906943e-05, + "loss": 0.1456, + "step": 11340, + "teacher_loss": 0.12443230301141739 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.19143471121788025, + "learning_rate": 2.6758561334763455e-05, + "loss": 0.1773, + "step": 11341, + "teacher_loss": 0.17567458748817444 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.6370750069618225, + "learning_rate": 2.675715102203142e-05, + "loss": 0.4479, + "step": 11342, + "teacher_loss": 0.4269050359725952 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.3071538805961609, + "learning_rate": 2.6755740439743153e-05, + "loss": 0.2386, + "step": 11343, + "teacher_loss": 0.23096255958080292 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4049166738986969, + "learning_rate": 2.6754329587931005e-05, + "loss": 0.1745, + "step": 11344, + "teacher_loss": 0.14885716140270233 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.6913039088249207, + "learning_rate": 2.6752918466627325e-05, + "loss": 0.2659, + "step": 11345, + "teacher_loss": 0.2186136245727539 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.7287171483039856, + "learning_rate": 2.675150707586446e-05, + "loss": 0.2327, + "step": 11346, + "teacher_loss": 0.17760223150253296 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.1814676821231842, + "learning_rate": 2.6750095415674768e-05, + "loss": 0.2724, + "step": 11347, + "teacher_loss": 0.28253573179244995 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5242919921875, + "learning_rate": 2.6748683486090616e-05, + "loss": 0.3449, + "step": 11348, + "teacher_loss": 0.3250080943107605 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.540088951587677, + "learning_rate": 2.674727128714438e-05, + "loss": 0.5124, + "step": 11349, + "teacher_loss": 0.5093181133270264 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.3999256491661072, + "learning_rate": 2.6745858818868434e-05, + "loss": 0.2776, + "step": 11350, + "teacher_loss": 0.2640034854412079 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5296216011047363, + "learning_rate": 2.674444608129516e-05, + "loss": 0.3481, + "step": 11351, + "teacher_loss": 0.3279725909233093 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.7622315287590027, + "learning_rate": 2.6743033074456945e-05, + "loss": 0.3889, + "step": 11352, + "teacher_loss": 0.3474624752998352 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.30449968576431274, + "learning_rate": 2.6741619798386195e-05, + "loss": 0.2967, + "step": 11353, + "teacher_loss": 0.2958603501319885 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4423085153102875, + "learning_rate": 2.67402062531153e-05, + "loss": 0.2303, + "step": 11354, + "teacher_loss": 0.20670422911643982 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.2931573987007141, + "learning_rate": 2.673879243867668e-05, + "loss": 0.2141, + "step": 11355, + "teacher_loss": 0.20527726411819458 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.42857998609542847, + "learning_rate": 2.6737378355102743e-05, + "loss": 0.2137, + "step": 11356, + "teacher_loss": 0.18977797031402588 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 1.5221748352050781, + "learning_rate": 2.673596400242591e-05, + "loss": 0.4595, + "step": 11357, + "teacher_loss": 0.3414172828197479 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.3822225332260132, + "learning_rate": 2.6734549380678606e-05, + "loss": 0.2773, + "step": 11358, + "teacher_loss": 0.2656382918357849 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.43989866971969604, + "learning_rate": 2.6733134489893268e-05, + "loss": 0.1877, + "step": 11359, + "teacher_loss": 0.15964946150779724 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4692853093147278, + "learning_rate": 2.673171933010234e-05, + "loss": 0.2678, + "step": 11360, + "teacher_loss": 0.24543660879135132 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.4771975874900818, + "learning_rate": 2.6730303901338254e-05, + "loss": 0.2474, + "step": 11361, + "teacher_loss": 0.22184711694717407 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.41034796833992004, + "learning_rate": 2.672888820363347e-05, + "loss": 0.2598, + "step": 11362, + "teacher_loss": 0.24309095740318298 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.2897535562515259, + "learning_rate": 2.672747223702045e-05, + "loss": 0.1623, + "step": 11363, + "teacher_loss": 0.14814062416553497 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5135263204574585, + "learning_rate": 2.6726056001531647e-05, + "loss": 0.2283, + "step": 11364, + "teacher_loss": 0.1965990662574768 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.24716857075691223, + "learning_rate": 2.6724639497199536e-05, + "loss": 0.2304, + "step": 11365, + "teacher_loss": 0.2285584658384323 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.3011236786842346, + "learning_rate": 2.6723222724056596e-05, + "loss": 0.1785, + "step": 11366, + "teacher_loss": 0.1648693084716797 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.5832270383834839, + "learning_rate": 2.6721805682135306e-05, + "loss": 0.5169, + "step": 11367, + "teacher_loss": 0.5095845460891724 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.6850972771644592, + "learning_rate": 2.6720388371468155e-05, + "loss": 0.3605, + "step": 11368, + "teacher_loss": 0.3244236707687378 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.47433677315711975, + "learning_rate": 2.6718970792087642e-05, + "loss": 0.2044, + "step": 11369, + "teacher_loss": 0.1743718385696411 + }, + { + "compression_loss": 0.0, + "epoch": 2.05, + "label_loss": 0.80791836977005, + "learning_rate": 2.6717552944026258e-05, + "loss": 0.4262, + "step": 11370, + "teacher_loss": 0.38377946615219116 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.7666103839874268, + "learning_rate": 2.671613482731652e-05, + "loss": 0.8251, + "step": 11371, + "teacher_loss": 0.8316085934638977 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5949872732162476, + "learning_rate": 2.6714716441990937e-05, + "loss": 0.2502, + "step": 11372, + "teacher_loss": 0.21190816164016724 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.3114638924598694, + "learning_rate": 2.6713297788082025e-05, + "loss": 0.2185, + "step": 11373, + "teacher_loss": 0.2082090973854065 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.31177061796188354, + "learning_rate": 2.671187886562232e-05, + "loss": 0.3025, + "step": 11374, + "teacher_loss": 0.30150407552719116 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4346925616264343, + "learning_rate": 2.671045967464434e-05, + "loss": 0.3049, + "step": 11375, + "teacher_loss": 0.2905040383338928 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4796760678291321, + "learning_rate": 2.6709040215180633e-05, + "loss": 0.2361, + "step": 11376, + "teacher_loss": 0.20907297730445862 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4441388249397278, + "learning_rate": 2.670762048726374e-05, + "loss": 0.4339, + "step": 11377, + "teacher_loss": 0.4327280819416046 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 1.0570833683013916, + "learning_rate": 2.6706200490926204e-05, + "loss": 0.7004, + "step": 11378, + "teacher_loss": 0.6607751846313477 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.3764675557613373, + "learning_rate": 2.6704780226200593e-05, + "loss": 0.2676, + "step": 11379, + "teacher_loss": 0.25549939274787903 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4097643494606018, + "learning_rate": 2.6703359693119468e-05, + "loss": 0.2764, + "step": 11380, + "teacher_loss": 0.26159554719924927 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.17159897089004517, + "learning_rate": 2.6701938891715385e-05, + "loss": 0.2116, + "step": 11381, + "teacher_loss": 0.2160591036081314 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.8258722424507141, + "learning_rate": 2.6700517822020934e-05, + "loss": 0.3238, + "step": 11382, + "teacher_loss": 0.2680235207080841 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.3728242516517639, + "learning_rate": 2.6699096484068686e-05, + "loss": 0.2009, + "step": 11383, + "teacher_loss": 0.18182902038097382 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.8285374641418457, + "learning_rate": 2.6697674877891234e-05, + "loss": 0.4015, + "step": 11384, + "teacher_loss": 0.3540651798248291 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.6131064891815186, + "learning_rate": 2.6696253003521165e-05, + "loss": 0.2731, + "step": 11385, + "teacher_loss": 0.23526602983474731 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.11068959534168243, + "learning_rate": 2.6694830860991087e-05, + "loss": 0.2356, + "step": 11386, + "teacher_loss": 0.24946291744709015 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.6774469017982483, + "learning_rate": 2.6693408450333594e-05, + "loss": 0.2876, + "step": 11387, + "teacher_loss": 0.2442491352558136 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.6583279371261597, + "learning_rate": 2.669198577158131e-05, + "loss": 0.2307, + "step": 11388, + "teacher_loss": 0.1831304132938385 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.2326776087284088, + "learning_rate": 2.669056282476684e-05, + "loss": 0.1644, + "step": 11389, + "teacher_loss": 0.1568393111228943 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.40882301330566406, + "learning_rate": 2.668913960992282e-05, + "loss": 0.3717, + "step": 11390, + "teacher_loss": 0.3675907850265503 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.16709105670452118, + "learning_rate": 2.6687716127081873e-05, + "loss": 0.2232, + "step": 11391, + "teacher_loss": 0.22937899827957153 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.6514356136322021, + "learning_rate": 2.6686292376276637e-05, + "loss": 0.2949, + "step": 11392, + "teacher_loss": 0.2552560269832611 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 1.2707115411758423, + "learning_rate": 2.6684868357539754e-05, + "loss": 0.6124, + "step": 11393, + "teacher_loss": 0.5392736792564392 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5533962249755859, + "learning_rate": 2.6683444070903867e-05, + "loss": 0.3121, + "step": 11394, + "teacher_loss": 0.2852592468261719 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.2995579242706299, + "learning_rate": 2.6682019516401644e-05, + "loss": 0.2554, + "step": 11395, + "teacher_loss": 0.250477135181427 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4474751949310303, + "learning_rate": 2.668059469406574e-05, + "loss": 0.2759, + "step": 11396, + "teacher_loss": 0.2568162977695465 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4892094135284424, + "learning_rate": 2.667916960392881e-05, + "loss": 0.4394, + "step": 11397, + "teacher_loss": 0.4338518977165222 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.416218101978302, + "learning_rate": 2.6677744246023543e-05, + "loss": 0.2643, + "step": 11398, + "teacher_loss": 0.24744123220443726 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5471934080123901, + "learning_rate": 2.667631862038261e-05, + "loss": 0.2875, + "step": 11399, + "teacher_loss": 0.2586797773838043 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.23702137172222137, + "learning_rate": 2.6674892727038705e-05, + "loss": 0.1887, + "step": 11400, + "teacher_loss": 0.1832950860261917 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5204581022262573, + "learning_rate": 2.6673466566024507e-05, + "loss": 0.2367, + "step": 11401, + "teacher_loss": 0.20518356561660767 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 1.1431727409362793, + "learning_rate": 2.667204013737272e-05, + "loss": 0.5509, + "step": 11402, + "teacher_loss": 0.48510515689849854 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.48504188656806946, + "learning_rate": 2.6670613441116044e-05, + "loss": 0.2753, + "step": 11403, + "teacher_loss": 0.25195983052253723 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.29413798451423645, + "learning_rate": 2.66691864772872e-05, + "loss": 0.2595, + "step": 11404, + "teacher_loss": 0.2556458115577698 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.18042083084583282, + "learning_rate": 2.666775924591889e-05, + "loss": 0.2159, + "step": 11405, + "teacher_loss": 0.2198476791381836 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.753820538520813, + "learning_rate": 2.6666331747043842e-05, + "loss": 0.3158, + "step": 11406, + "teacher_loss": 0.26708540320396423 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4868769645690918, + "learning_rate": 2.6664903980694788e-05, + "loss": 0.4509, + "step": 11407, + "teacher_loss": 0.4469580352306366 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4388984143733978, + "learning_rate": 2.6663475946904455e-05, + "loss": 0.3446, + "step": 11408, + "teacher_loss": 0.334101140499115 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5309861898422241, + "learning_rate": 2.6662047645705594e-05, + "loss": 0.2445, + "step": 11409, + "teacher_loss": 0.21266715228557587 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.3994253873825073, + "learning_rate": 2.666061907713094e-05, + "loss": 0.1733, + "step": 11410, + "teacher_loss": 0.14822141826152802 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5392282009124756, + "learning_rate": 2.665919024121325e-05, + "loss": 0.2879, + "step": 11411, + "teacher_loss": 0.2599894106388092 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.40017589926719666, + "learning_rate": 2.665776113798529e-05, + "loss": 0.2138, + "step": 11412, + "teacher_loss": 0.19310961663722992 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.2381407469511032, + "learning_rate": 2.6656331767479812e-05, + "loss": 0.2444, + "step": 11413, + "teacher_loss": 0.24511444568634033 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.506705641746521, + "learning_rate": 2.6654902129729598e-05, + "loss": 0.244, + "step": 11414, + "teacher_loss": 0.21480131149291992 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.20453046262264252, + "learning_rate": 2.6653472224767418e-05, + "loss": 0.2079, + "step": 11415, + "teacher_loss": 0.20824390649795532 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4179179072380066, + "learning_rate": 2.6652042052626065e-05, + "loss": 0.1925, + "step": 11416, + "teacher_loss": 0.16740979254245758 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.4616493880748749, + "learning_rate": 2.6650611613338314e-05, + "loss": 0.2718, + "step": 11417, + "teacher_loss": 0.2506926655769348 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.715360164642334, + "learning_rate": 2.6649180906936975e-05, + "loss": 0.3145, + "step": 11418, + "teacher_loss": 0.27000635862350464 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.25587835907936096, + "learning_rate": 2.664774993345484e-05, + "loss": 0.2004, + "step": 11419, + "teacher_loss": 0.19425319135189056 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.34409594535827637, + "learning_rate": 2.6646318692924727e-05, + "loss": 0.2402, + "step": 11420, + "teacher_loss": 0.22863470017910004 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.21863621473312378, + "learning_rate": 2.664488718537944e-05, + "loss": 0.214, + "step": 11421, + "teacher_loss": 0.21350552141666412 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.6358727216720581, + "learning_rate": 2.66434554108518e-05, + "loss": 0.3149, + "step": 11422, + "teacher_loss": 0.27927178144454956 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.7564515471458435, + "learning_rate": 2.664202336937464e-05, + "loss": 0.5201, + "step": 11423, + "teacher_loss": 0.4938763976097107 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.5134033560752869, + "learning_rate": 2.664059106098079e-05, + "loss": 0.2817, + "step": 11424, + "teacher_loss": 0.2559245228767395 + }, + { + "compression_loss": 0.0, + "epoch": 2.06, + "label_loss": 0.9037539958953857, + "learning_rate": 2.6639158485703087e-05, + "loss": 0.3353, + "step": 11425, + "teacher_loss": 0.2721256911754608 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.7813578844070435, + "learning_rate": 2.6637725643574372e-05, + "loss": 0.3245, + "step": 11426, + "teacher_loss": 0.27375704050064087 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.5056071877479553, + "learning_rate": 2.66362925346275e-05, + "loss": 0.2533, + "step": 11427, + "teacher_loss": 0.22531168162822723 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.6999883055686951, + "learning_rate": 2.663485915889533e-05, + "loss": 0.3655, + "step": 11428, + "teacher_loss": 0.32831957936286926 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.9246419072151184, + "learning_rate": 2.6633425516410723e-05, + "loss": 0.3743, + "step": 11429, + "teacher_loss": 0.3130955100059509 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.2807331383228302, + "learning_rate": 2.6631991607206546e-05, + "loss": 0.2642, + "step": 11430, + "teacher_loss": 0.2623119354248047 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.34489190578460693, + "learning_rate": 2.663055743131568e-05, + "loss": 0.2422, + "step": 11431, + "teacher_loss": 0.23082560300827026 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.807802677154541, + "learning_rate": 2.6629122988770994e-05, + "loss": 0.3389, + "step": 11432, + "teacher_loss": 0.2867452800273895 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.7090919613838196, + "learning_rate": 2.662768827960539e-05, + "loss": 0.344, + "step": 11433, + "teacher_loss": 0.30344754457473755 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.5698674917221069, + "learning_rate": 2.6626253303851753e-05, + "loss": 0.3525, + "step": 11434, + "teacher_loss": 0.328380823135376 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.7092264294624329, + "learning_rate": 2.6624818061542987e-05, + "loss": 0.5226, + "step": 11435, + "teacher_loss": 0.5018178224563599 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.4048399329185486, + "learning_rate": 2.6623382552711994e-05, + "loss": 0.331, + "step": 11436, + "teacher_loss": 0.322791188955307 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.24470043182373047, + "learning_rate": 2.6621946777391693e-05, + "loss": 0.2157, + "step": 11437, + "teacher_loss": 0.21246306598186493 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3553011417388916, + "learning_rate": 2.662051073561499e-05, + "loss": 0.2741, + "step": 11438, + "teacher_loss": 0.26510071754455566 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.21672523021697998, + "learning_rate": 2.6619074427414817e-05, + "loss": 0.2586, + "step": 11439, + "teacher_loss": 0.26324760913848877 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.5286556482315063, + "learning_rate": 2.6617637852824107e-05, + "loss": 0.3539, + "step": 11440, + "teacher_loss": 0.33448976278305054 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3425612449645996, + "learning_rate": 2.6616201011875792e-05, + "loss": 0.2759, + "step": 11441, + "teacher_loss": 0.2684401869773865 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.34516701102256775, + "learning_rate": 2.6614763904602812e-05, + "loss": 0.2883, + "step": 11442, + "teacher_loss": 0.2819909453392029 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.469087153673172, + "learning_rate": 2.661332653103812e-05, + "loss": 0.301, + "step": 11443, + "teacher_loss": 0.2823309600353241 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.15045467019081116, + "learning_rate": 2.661188889121467e-05, + "loss": 0.1932, + "step": 11444, + "teacher_loss": 0.19799047708511353 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.32715171575546265, + "learning_rate": 2.661045098516542e-05, + "loss": 0.2446, + "step": 11445, + "teacher_loss": 0.23545153439044952 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.4435218274593353, + "learning_rate": 2.660901281292334e-05, + "loss": 0.2106, + "step": 11446, + "teacher_loss": 0.18474188446998596 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.5281405448913574, + "learning_rate": 2.66075743745214e-05, + "loss": 0.2609, + "step": 11447, + "teacher_loss": 0.23123279213905334 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3339434266090393, + "learning_rate": 2.6606135669992583e-05, + "loss": 0.2472, + "step": 11448, + "teacher_loss": 0.23753131926059723 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3242594003677368, + "learning_rate": 2.6604696699369872e-05, + "loss": 0.2995, + "step": 11449, + "teacher_loss": 0.29677414894104004 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.5262978076934814, + "learning_rate": 2.6603257462686258e-05, + "loss": 0.2556, + "step": 11450, + "teacher_loss": 0.22556047141551971 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.22574535012245178, + "learning_rate": 2.660181795997474e-05, + "loss": 0.1648, + "step": 11451, + "teacher_loss": 0.15807875990867615 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.46097689867019653, + "learning_rate": 2.6600378191268317e-05, + "loss": 0.3554, + "step": 11452, + "teacher_loss": 0.34369906783103943 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.4093618392944336, + "learning_rate": 2.6598938156600005e-05, + "loss": 0.2279, + "step": 11453, + "teacher_loss": 0.2077624648809433 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.8208404779434204, + "learning_rate": 2.6597497856002815e-05, + "loss": 0.5395, + "step": 11454, + "teacher_loss": 0.5082308053970337 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.4899601340293884, + "learning_rate": 2.6596057289509773e-05, + "loss": 0.2511, + "step": 11455, + "teacher_loss": 0.22460442781448364 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.597091794013977, + "learning_rate": 2.65946164571539e-05, + "loss": 0.3034, + "step": 11456, + "teacher_loss": 0.2707829475402832 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.8050843477249146, + "learning_rate": 2.6593175358968236e-05, + "loss": 0.2514, + "step": 11457, + "teacher_loss": 0.18992206454277039 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.31587517261505127, + "learning_rate": 2.659173399498582e-05, + "loss": 0.18, + "step": 11458, + "teacher_loss": 0.16488569974899292 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.2840992510318756, + "learning_rate": 2.6590292365239695e-05, + "loss": 0.2673, + "step": 11459, + "teacher_loss": 0.26540112495422363 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 1.3859256505966187, + "learning_rate": 2.6588850469762916e-05, + "loss": 0.3554, + "step": 11460, + "teacher_loss": 0.24092447757720947 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.426033079624176, + "learning_rate": 2.6587408308588544e-05, + "loss": 0.2885, + "step": 11461, + "teacher_loss": 0.2732661962509155 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3164922595024109, + "learning_rate": 2.658596588174964e-05, + "loss": 0.1829, + "step": 11462, + "teacher_loss": 0.16805408895015717 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.2843630909919739, + "learning_rate": 2.6584523189279272e-05, + "loss": 0.2109, + "step": 11463, + "teacher_loss": 0.20278067886829376 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3334880471229553, + "learning_rate": 2.658308023121052e-05, + "loss": 0.249, + "step": 11464, + "teacher_loss": 0.23963509500026703 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.16369962692260742, + "learning_rate": 2.6581637007576463e-05, + "loss": 0.2105, + "step": 11465, + "teacher_loss": 0.21565020084381104 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.35381096601486206, + "learning_rate": 2.65801935184102e-05, + "loss": 0.2575, + "step": 11466, + "teacher_loss": 0.24675695598125458 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.49458664655685425, + "learning_rate": 2.6578749763744815e-05, + "loss": 0.2436, + "step": 11467, + "teacher_loss": 0.2157311737537384 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.7266108989715576, + "learning_rate": 2.657730574361341e-05, + "loss": 0.2407, + "step": 11468, + "teacher_loss": 0.18672975897789001 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.1076120063662529, + "learning_rate": 2.65758614580491e-05, + "loss": 0.1619, + "step": 11469, + "teacher_loss": 0.16797390580177307 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.45156431198120117, + "learning_rate": 2.6574416907084993e-05, + "loss": 0.2822, + "step": 11470, + "teacher_loss": 0.2633805274963379 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.769243061542511, + "learning_rate": 2.6572972090754205e-05, + "loss": 0.3033, + "step": 11471, + "teacher_loss": 0.25148072838783264 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.39401692152023315, + "learning_rate": 2.6571527009089868e-05, + "loss": 0.3032, + "step": 11472, + "teacher_loss": 0.2931518852710724 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3975682258605957, + "learning_rate": 2.657008166212511e-05, + "loss": 0.2405, + "step": 11473, + "teacher_loss": 0.2231028825044632 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.2454376369714737, + "learning_rate": 2.656863604989306e-05, + "loss": 0.2289, + "step": 11474, + "teacher_loss": 0.22706955671310425 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.39082401990890503, + "learning_rate": 2.656719017242688e-05, + "loss": 0.2255, + "step": 11475, + "teacher_loss": 0.207082137465477 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.36211955547332764, + "learning_rate": 2.6565744029759702e-05, + "loss": 0.3358, + "step": 11476, + "teacher_loss": 0.33288872241973877 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.31308960914611816, + "learning_rate": 2.6564297621924696e-05, + "loss": 0.3274, + "step": 11477, + "teacher_loss": 0.3289948105812073 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 1.1792408227920532, + "learning_rate": 2.656285094895501e-05, + "loss": 0.4596, + "step": 11478, + "teacher_loss": 0.3796197175979614 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.513270378112793, + "learning_rate": 2.656140401088383e-05, + "loss": 0.2464, + "step": 11479, + "teacher_loss": 0.21678464114665985 + }, + { + "compression_loss": 0.0, + "epoch": 2.07, + "label_loss": 0.3803479075431824, + "learning_rate": 2.655995680774431e-05, + "loss": 0.2962, + "step": 11480, + "teacher_loss": 0.28682851791381836 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3974624276161194, + "learning_rate": 2.6558509339569638e-05, + "loss": 0.1999, + "step": 11481, + "teacher_loss": 0.1779431849718094 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.11776053160429001, + "learning_rate": 2.6557061606393008e-05, + "loss": 0.202, + "step": 11482, + "teacher_loss": 0.21130812168121338 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.218589186668396, + "learning_rate": 2.65556136082476e-05, + "loss": 0.171, + "step": 11483, + "teacher_loss": 0.1656721830368042 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5219054222106934, + "learning_rate": 2.655416534516662e-05, + "loss": 0.2548, + "step": 11484, + "teacher_loss": 0.22517472505569458 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.778640627861023, + "learning_rate": 2.6552716817183263e-05, + "loss": 0.3712, + "step": 11485, + "teacher_loss": 0.3259029984474182 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3549985885620117, + "learning_rate": 2.6551268024330754e-05, + "loss": 0.224, + "step": 11486, + "teacher_loss": 0.20948973298072815 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5217106938362122, + "learning_rate": 2.6549818966642297e-05, + "loss": 0.3921, + "step": 11487, + "teacher_loss": 0.37764373421669006 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.27615150809288025, + "learning_rate": 2.654836964415112e-05, + "loss": 0.2988, + "step": 11488, + "teacher_loss": 0.3013116121292114 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.7139608263969421, + "learning_rate": 2.6546920056890456e-05, + "loss": 0.2958, + "step": 11489, + "teacher_loss": 0.2493637204170227 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.12833325564861298, + "learning_rate": 2.654547020489353e-05, + "loss": 0.2583, + "step": 11490, + "teacher_loss": 0.27271461486816406 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3258852958679199, + "learning_rate": 2.654402008819359e-05, + "loss": 0.3044, + "step": 11491, + "teacher_loss": 0.3019777536392212 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3171885907649994, + "learning_rate": 2.654256970682388e-05, + "loss": 0.234, + "step": 11492, + "teacher_loss": 0.22480100393295288 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.48541080951690674, + "learning_rate": 2.654111906081765e-05, + "loss": 0.2637, + "step": 11493, + "teacher_loss": 0.2390764355659485 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.47897306084632874, + "learning_rate": 2.6539668150208163e-05, + "loss": 0.192, + "step": 11494, + "teacher_loss": 0.16007855534553528 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.7920082211494446, + "learning_rate": 2.6538216975028685e-05, + "loss": 0.3755, + "step": 11495, + "teacher_loss": 0.3292248845100403 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.48282814025878906, + "learning_rate": 2.6536765535312484e-05, + "loss": 0.334, + "step": 11496, + "teacher_loss": 0.31745779514312744 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5188870429992676, + "learning_rate": 2.6535313831092836e-05, + "loss": 0.2768, + "step": 11497, + "teacher_loss": 0.24995186924934387 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.31040865182876587, + "learning_rate": 2.6533861862403028e-05, + "loss": 0.2305, + "step": 11498, + "teacher_loss": 0.22160229086875916 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5093654990196228, + "learning_rate": 2.653240962927635e-05, + "loss": 0.2656, + "step": 11499, + "teacher_loss": 0.23854956030845642 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.30939435958862305, + "learning_rate": 2.6530957131746095e-05, + "loss": 0.3213, + "step": 11500, + "teacher_loss": 0.32258930802345276 + }, + { + "epoch": 2.08, + "eval_exact_match": 79.49858088930937, + "eval_f1": 86.98967637166847, + "step": 11500 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.9235019683837891, + "learning_rate": 2.652950436984556e-05, + "loss": 0.3569, + "step": 11501, + "teacher_loss": 0.2939265966415405 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.21222499012947083, + "learning_rate": 2.6528051343608063e-05, + "loss": 0.1878, + "step": 11502, + "teacher_loss": 0.18513526022434235 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.8145816326141357, + "learning_rate": 2.6526598053066904e-05, + "loss": 0.4081, + "step": 11503, + "teacher_loss": 0.36292803287506104 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5495126247406006, + "learning_rate": 2.6525144498255417e-05, + "loss": 0.2945, + "step": 11504, + "teacher_loss": 0.266146719455719 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4471229016780853, + "learning_rate": 2.6523690679206922e-05, + "loss": 0.2837, + "step": 11505, + "teacher_loss": 0.2655555009841919 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3105457127094269, + "learning_rate": 2.6522236595954747e-05, + "loss": 0.2031, + "step": 11506, + "teacher_loss": 0.19117000699043274 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3489554226398468, + "learning_rate": 2.6520782248532226e-05, + "loss": 0.1961, + "step": 11507, + "teacher_loss": 0.17906615138053894 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.23212981224060059, + "learning_rate": 2.6519327636972716e-05, + "loss": 0.2283, + "step": 11508, + "teacher_loss": 0.22782379388809204 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.8445788621902466, + "learning_rate": 2.651787276130956e-05, + "loss": 0.3827, + "step": 11509, + "teacher_loss": 0.3313617706298828 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.31710678339004517, + "learning_rate": 2.6516417621576113e-05, + "loss": 0.2812, + "step": 11510, + "teacher_loss": 0.27719205617904663 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.7960541844367981, + "learning_rate": 2.651496221780574e-05, + "loss": 0.6575, + "step": 11511, + "teacher_loss": 0.6421504020690918 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4644680321216583, + "learning_rate": 2.65135065500318e-05, + "loss": 0.1865, + "step": 11512, + "teacher_loss": 0.15564100444316864 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.7393025159835815, + "learning_rate": 2.651205061828768e-05, + "loss": 0.378, + "step": 11513, + "teacher_loss": 0.33785346150398254 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.267619788646698, + "learning_rate": 2.6510594422606757e-05, + "loss": 0.2544, + "step": 11514, + "teacher_loss": 0.2529555559158325 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5073104500770569, + "learning_rate": 2.6509137963022408e-05, + "loss": 0.22, + "step": 11515, + "teacher_loss": 0.18805575370788574 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.327262282371521, + "learning_rate": 2.6507681239568034e-05, + "loss": 0.2168, + "step": 11516, + "teacher_loss": 0.2044813632965088 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4161885976791382, + "learning_rate": 2.650622425227703e-05, + "loss": 0.3237, + "step": 11517, + "teacher_loss": 0.3133743405342102 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.2961266040802002, + "learning_rate": 2.6504767001182807e-05, + "loss": 0.2798, + "step": 11518, + "teacher_loss": 0.2780037820339203 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.27459368109703064, + "learning_rate": 2.650330948631876e-05, + "loss": 0.211, + "step": 11519, + "teacher_loss": 0.20397761464118958 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.44292938709259033, + "learning_rate": 2.6501851707718322e-05, + "loss": 0.206, + "step": 11520, + "teacher_loss": 0.1796289086341858 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5749096870422363, + "learning_rate": 2.6500393665414906e-05, + "loss": 0.7059, + "step": 11521, + "teacher_loss": 0.7204955816268921 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4794371426105499, + "learning_rate": 2.649893535944195e-05, + "loss": 0.3728, + "step": 11522, + "teacher_loss": 0.3610028028488159 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.1794937402009964, + "learning_rate": 2.6497476789832873e-05, + "loss": 0.2259, + "step": 11523, + "teacher_loss": 0.23105546832084656 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.7142660021781921, + "learning_rate": 2.6496017956621126e-05, + "loss": 0.5794, + "step": 11524, + "teacher_loss": 0.5644403696060181 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4158456325531006, + "learning_rate": 2.6494558859840157e-05, + "loss": 0.2554, + "step": 11525, + "teacher_loss": 0.23762312531471252 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4289650619029999, + "learning_rate": 2.6493099499523416e-05, + "loss": 0.2138, + "step": 11526, + "teacher_loss": 0.18991005420684814 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.5315886735916138, + "learning_rate": 2.6491639875704358e-05, + "loss": 0.2412, + "step": 11527, + "teacher_loss": 0.20890063047409058 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.9457077980041504, + "learning_rate": 2.6490179988416453e-05, + "loss": 0.2782, + "step": 11528, + "teacher_loss": 0.2040167599916458 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.31757789850234985, + "learning_rate": 2.648871983769317e-05, + "loss": 0.1751, + "step": 11529, + "teacher_loss": 0.1592797189950943 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4163209795951843, + "learning_rate": 2.6487259423567988e-05, + "loss": 0.2582, + "step": 11530, + "teacher_loss": 0.2406538426876068 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.3915764093399048, + "learning_rate": 2.6485798746074383e-05, + "loss": 0.4032, + "step": 11531, + "teacher_loss": 0.4044739603996277 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 1.0775914192199707, + "learning_rate": 2.648433780524586e-05, + "loss": 0.2503, + "step": 11532, + "teacher_loss": 0.15838435292243958 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4765702486038208, + "learning_rate": 2.6482876601115892e-05, + "loss": 0.3266, + "step": 11533, + "teacher_loss": 0.3099861741065979 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.4064400792121887, + "learning_rate": 2.6481415133717996e-05, + "loss": 0.2799, + "step": 11534, + "teacher_loss": 0.2658911943435669 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.8612875938415527, + "learning_rate": 2.6479953403085668e-05, + "loss": 0.4432, + "step": 11535, + "teacher_loss": 0.3967375159263611 + }, + { + "compression_loss": 0.0, + "epoch": 2.08, + "label_loss": 0.2469785362482071, + "learning_rate": 2.647849140925243e-05, + "loss": 0.2348, + "step": 11536, + "teacher_loss": 0.23347502946853638 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.48398667573928833, + "learning_rate": 2.6477029152251804e-05, + "loss": 0.2074, + "step": 11537, + "teacher_loss": 0.1766219586133957 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3509359657764435, + "learning_rate": 2.6475566632117305e-05, + "loss": 0.2066, + "step": 11538, + "teacher_loss": 0.19056370854377747 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.21700313687324524, + "learning_rate": 2.647410384888247e-05, + "loss": 0.1886, + "step": 11539, + "teacher_loss": 0.1853916347026825 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4358169138431549, + "learning_rate": 2.6472640802580835e-05, + "loss": 0.3463, + "step": 11540, + "teacher_loss": 0.33630073070526123 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3147571086883545, + "learning_rate": 2.6471177493245942e-05, + "loss": 0.2904, + "step": 11541, + "teacher_loss": 0.2876918911933899 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3784254789352417, + "learning_rate": 2.646971392091134e-05, + "loss": 0.216, + "step": 11542, + "teacher_loss": 0.19793689250946045 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4209362268447876, + "learning_rate": 2.646825008561059e-05, + "loss": 0.3424, + "step": 11543, + "teacher_loss": 0.3336646258831024 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.7893935441970825, + "learning_rate": 2.6466785987377248e-05, + "loss": 0.5876, + "step": 11544, + "teacher_loss": 0.5651889443397522 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3821299076080322, + "learning_rate": 2.646532162624488e-05, + "loss": 0.2026, + "step": 11545, + "teacher_loss": 0.18260201811790466 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3684558868408203, + "learning_rate": 2.646385700224706e-05, + "loss": 0.1951, + "step": 11546, + "teacher_loss": 0.1758539080619812 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.2763667702674866, + "learning_rate": 2.6462392115417374e-05, + "loss": 0.2194, + "step": 11547, + "teacher_loss": 0.21307410299777985 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.23371244966983795, + "learning_rate": 2.6460926965789403e-05, + "loss": 0.2388, + "step": 11548, + "teacher_loss": 0.23935914039611816 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.397588849067688, + "learning_rate": 2.6459461553396734e-05, + "loss": 0.2771, + "step": 11549, + "teacher_loss": 0.26375043392181396 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 1.0016975402832031, + "learning_rate": 2.6457995878272972e-05, + "loss": 0.3498, + "step": 11550, + "teacher_loss": 0.27739956974983215 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.622583270072937, + "learning_rate": 2.645652994045172e-05, + "loss": 0.4905, + "step": 11551, + "teacher_loss": 0.4758085012435913 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.45356571674346924, + "learning_rate": 2.645506373996658e-05, + "loss": 0.2727, + "step": 11552, + "teacher_loss": 0.2525651454925537 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.8043028116226196, + "learning_rate": 2.6453597276851173e-05, + "loss": 0.4029, + "step": 11553, + "teacher_loss": 0.3582611680030823 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.6703791618347168, + "learning_rate": 2.645213055113912e-05, + "loss": 0.3104, + "step": 11554, + "teacher_loss": 0.2704000473022461 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3875727653503418, + "learning_rate": 2.6450663562864052e-05, + "loss": 0.293, + "step": 11555, + "teacher_loss": 0.28245264291763306 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.7905151844024658, + "learning_rate": 2.6449196312059596e-05, + "loss": 0.4572, + "step": 11556, + "teacher_loss": 0.42016997933387756 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.6901015043258667, + "learning_rate": 2.64477287987594e-05, + "loss": 0.3237, + "step": 11557, + "teacher_loss": 0.282986044883728 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.241962730884552, + "learning_rate": 2.6446261022997098e-05, + "loss": 0.1986, + "step": 11558, + "teacher_loss": 0.19382070004940033 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.1880953013896942, + "learning_rate": 2.6444792984806352e-05, + "loss": 0.1853, + "step": 11559, + "teacher_loss": 0.1849527508020401 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.5639216899871826, + "learning_rate": 2.6443324684220815e-05, + "loss": 0.3039, + "step": 11560, + "teacher_loss": 0.2750260829925537 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.26886558532714844, + "learning_rate": 2.6441856121274154e-05, + "loss": 0.2468, + "step": 11561, + "teacher_loss": 0.24436140060424805 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.7171804308891296, + "learning_rate": 2.6440387296000037e-05, + "loss": 0.2556, + "step": 11562, + "teacher_loss": 0.20427894592285156 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4552052915096283, + "learning_rate": 2.6438918208432136e-05, + "loss": 0.3841, + "step": 11563, + "teacher_loss": 0.37614595890045166 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.2889822721481323, + "learning_rate": 2.643744885860414e-05, + "loss": 0.2251, + "step": 11564, + "teacher_loss": 0.21797339618206024 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3351632058620453, + "learning_rate": 2.6435979246549727e-05, + "loss": 0.1853, + "step": 11565, + "teacher_loss": 0.16866150498390198 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4389292597770691, + "learning_rate": 2.6434509372302602e-05, + "loss": 0.2828, + "step": 11566, + "teacher_loss": 0.26550358533859253 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.8094568252563477, + "learning_rate": 2.643303923589646e-05, + "loss": 0.6219, + "step": 11567, + "teacher_loss": 0.6010293960571289 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3604455292224884, + "learning_rate": 2.6431568837365e-05, + "loss": 0.3207, + "step": 11568, + "teacher_loss": 0.3162683844566345 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4267175495624542, + "learning_rate": 2.6430098176741943e-05, + "loss": 0.3261, + "step": 11569, + "teacher_loss": 0.3148837685585022 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4980096220970154, + "learning_rate": 2.6428627254061007e-05, + "loss": 0.4433, + "step": 11570, + "teacher_loss": 0.43725964426994324 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.46014195680618286, + "learning_rate": 2.6427156069355915e-05, + "loss": 0.3016, + "step": 11571, + "teacher_loss": 0.2839767336845398 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.5203524231910706, + "learning_rate": 2.6425684622660387e-05, + "loss": 0.2015, + "step": 11572, + "teacher_loss": 0.16604149341583252 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.20247912406921387, + "learning_rate": 2.642421291400817e-05, + "loss": 0.3619, + "step": 11573, + "teacher_loss": 0.37958797812461853 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.9878513216972351, + "learning_rate": 2.6422740943433004e-05, + "loss": 0.4417, + "step": 11574, + "teacher_loss": 0.3810656666755676 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.5010071396827698, + "learning_rate": 2.6421268710968634e-05, + "loss": 0.2622, + "step": 11575, + "teacher_loss": 0.235645592212677 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.2708182632923126, + "learning_rate": 2.6419796216648815e-05, + "loss": 0.3177, + "step": 11576, + "teacher_loss": 0.32288557291030884 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4682545065879822, + "learning_rate": 2.6418323460507307e-05, + "loss": 0.3218, + "step": 11577, + "teacher_loss": 0.30554401874542236 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.606867253780365, + "learning_rate": 2.641685044257788e-05, + "loss": 0.399, + "step": 11578, + "teacher_loss": 0.37594592571258545 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3676658868789673, + "learning_rate": 2.64153771628943e-05, + "loss": 0.1977, + "step": 11579, + "teacher_loss": 0.17878666520118713 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3219849169254303, + "learning_rate": 2.6413903621490343e-05, + "loss": 0.2121, + "step": 11580, + "teacher_loss": 0.19991880655288696 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.5104255676269531, + "learning_rate": 2.64124298183998e-05, + "loss": 0.2762, + "step": 11581, + "teacher_loss": 0.2501417398452759 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4044257402420044, + "learning_rate": 2.6410955753656454e-05, + "loss": 0.2095, + "step": 11582, + "teacher_loss": 0.18786993622779846 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3525312840938568, + "learning_rate": 2.6409481427294105e-05, + "loss": 0.2127, + "step": 11583, + "teacher_loss": 0.1971331238746643 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.6474061012268066, + "learning_rate": 2.640800683934656e-05, + "loss": 0.5159, + "step": 11584, + "teacher_loss": 0.5012890100479126 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.7495552897453308, + "learning_rate": 2.6406531989847615e-05, + "loss": 0.303, + "step": 11585, + "teacher_loss": 0.2534202039241791 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.1992206871509552, + "learning_rate": 2.640505687883109e-05, + "loss": 0.234, + "step": 11586, + "teacher_loss": 0.23788242042064667 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3321964144706726, + "learning_rate": 2.6403581506330807e-05, + "loss": 0.2551, + "step": 11587, + "teacher_loss": 0.24657011032104492 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.580051064491272, + "learning_rate": 2.6402105872380594e-05, + "loss": 0.5082, + "step": 11588, + "teacher_loss": 0.5001651048660278 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.4092367887496948, + "learning_rate": 2.640062997701427e-05, + "loss": 0.228, + "step": 11589, + "teacher_loss": 0.20781967043876648 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.7279612421989441, + "learning_rate": 2.6399153820265687e-05, + "loss": 0.3014, + "step": 11590, + "teacher_loss": 0.25404855608940125 + }, + { + "compression_loss": 0.0, + "epoch": 2.09, + "label_loss": 0.3218912184238434, + "learning_rate": 2.6397677402168682e-05, + "loss": 0.2654, + "step": 11591, + "teacher_loss": 0.2590905427932739 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.36744678020477295, + "learning_rate": 2.6396200722757107e-05, + "loss": 0.3881, + "step": 11592, + "teacher_loss": 0.39037349820137024 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.4213526248931885, + "learning_rate": 2.639472378206482e-05, + "loss": 0.1865, + "step": 11593, + "teacher_loss": 0.16042251884937286 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5628147125244141, + "learning_rate": 2.6393246580125672e-05, + "loss": 0.4136, + "step": 11594, + "teacher_loss": 0.39704567193984985 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.11632384359836578, + "learning_rate": 2.6391769116973545e-05, + "loss": 0.1418, + "step": 11595, + "teacher_loss": 0.14466771483421326 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.45982855558395386, + "learning_rate": 2.6390291392642305e-05, + "loss": 0.3066, + "step": 11596, + "teacher_loss": 0.28962796926498413 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.48725011944770813, + "learning_rate": 2.638881340716583e-05, + "loss": 0.2133, + "step": 11597, + "teacher_loss": 0.18290892243385315 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5628359317779541, + "learning_rate": 2.6387335160578012e-05, + "loss": 0.3256, + "step": 11598, + "teacher_loss": 0.2992381453514099 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6120871901512146, + "learning_rate": 2.638585665291274e-05, + "loss": 0.2968, + "step": 11599, + "teacher_loss": 0.2617568373680115 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.31551745533943176, + "learning_rate": 2.6384377884203912e-05, + "loss": 0.2168, + "step": 11600, + "teacher_loss": 0.20583681762218475 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5428146123886108, + "learning_rate": 2.638289885448543e-05, + "loss": 0.3146, + "step": 11601, + "teacher_loss": 0.2892981171607971 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.27808085083961487, + "learning_rate": 2.63814195637912e-05, + "loss": 0.2667, + "step": 11602, + "teacher_loss": 0.2654552161693573 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.2951381206512451, + "learning_rate": 2.637994001215515e-05, + "loss": 0.2869, + "step": 11603, + "teacher_loss": 0.2860385477542877 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 1.4654958248138428, + "learning_rate": 2.637846019961119e-05, + "loss": 0.5196, + "step": 11604, + "teacher_loss": 0.4144832491874695 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5902531147003174, + "learning_rate": 2.6376980126193256e-05, + "loss": 0.3088, + "step": 11605, + "teacher_loss": 0.27752721309661865 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5012961030006409, + "learning_rate": 2.637549979193528e-05, + "loss": 0.2773, + "step": 11606, + "teacher_loss": 0.25240397453308105 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.3962011933326721, + "learning_rate": 2.6374019196871193e-05, + "loss": 0.259, + "step": 11607, + "teacher_loss": 0.2437729835510254 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5605190992355347, + "learning_rate": 2.6372538341034952e-05, + "loss": 0.2608, + "step": 11608, + "teacher_loss": 0.22749318182468414 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.49389132857322693, + "learning_rate": 2.6371057224460497e-05, + "loss": 0.1969, + "step": 11609, + "teacher_loss": 0.16388659179210663 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.22084566950798035, + "learning_rate": 2.6369575847181795e-05, + "loss": 0.2625, + "step": 11610, + "teacher_loss": 0.26718103885650635 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.46870607137680054, + "learning_rate": 2.636809420923281e-05, + "loss": 0.1855, + "step": 11611, + "teacher_loss": 0.15398085117340088 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.36501044034957886, + "learning_rate": 2.6366612310647503e-05, + "loss": 0.4166, + "step": 11612, + "teacher_loss": 0.42231959104537964 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 1.0721452236175537, + "learning_rate": 2.6365130151459857e-05, + "loss": 0.4061, + "step": 11613, + "teacher_loss": 0.33210888504981995 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6533260345458984, + "learning_rate": 2.636364773170385e-05, + "loss": 0.2743, + "step": 11614, + "teacher_loss": 0.23213700950145721 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.7723872661590576, + "learning_rate": 2.636216505141347e-05, + "loss": 0.3336, + "step": 11615, + "teacher_loss": 0.2848455309867859 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.4633684754371643, + "learning_rate": 2.6360682110622714e-05, + "loss": 0.3084, + "step": 11616, + "teacher_loss": 0.29114609956741333 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.13535670936107635, + "learning_rate": 2.6359198909365578e-05, + "loss": 0.1491, + "step": 11617, + "teacher_loss": 0.15066124498844147 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6626337170600891, + "learning_rate": 2.6357715447676063e-05, + "loss": 0.3263, + "step": 11618, + "teacher_loss": 0.28890860080718994 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.2539743483066559, + "learning_rate": 2.635623172558819e-05, + "loss": 0.1915, + "step": 11619, + "teacher_loss": 0.18451187014579773 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5255005359649658, + "learning_rate": 2.635474774313597e-05, + "loss": 0.3201, + "step": 11620, + "teacher_loss": 0.2972955107688904 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6569070816040039, + "learning_rate": 2.6353263500353427e-05, + "loss": 0.3822, + "step": 11621, + "teacher_loss": 0.35166114568710327 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5396597385406494, + "learning_rate": 2.635177899727459e-05, + "loss": 0.5363, + "step": 11622, + "teacher_loss": 0.5358933806419373 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.7170656323432922, + "learning_rate": 2.6350294233933493e-05, + "loss": 0.3642, + "step": 11623, + "teacher_loss": 0.32500770688056946 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.779828667640686, + "learning_rate": 2.6348809210364185e-05, + "loss": 0.4942, + "step": 11624, + "teacher_loss": 0.462502121925354 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5123329758644104, + "learning_rate": 2.6347323926600702e-05, + "loss": 0.2371, + "step": 11625, + "teacher_loss": 0.20656032860279083 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.8832279443740845, + "learning_rate": 2.6345838382677107e-05, + "loss": 0.3167, + "step": 11626, + "teacher_loss": 0.25376179814338684 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.13777241110801697, + "learning_rate": 2.634435257862745e-05, + "loss": 0.1507, + "step": 11627, + "teacher_loss": 0.15217572450637817 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5983982682228088, + "learning_rate": 2.63428665144858e-05, + "loss": 0.3114, + "step": 11628, + "teacher_loss": 0.27953118085861206 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.34603428840637207, + "learning_rate": 2.6341380190286233e-05, + "loss": 0.2081, + "step": 11629, + "teacher_loss": 0.19276206195354462 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.3325658440589905, + "learning_rate": 2.6339893606062823e-05, + "loss": 0.1922, + "step": 11630, + "teacher_loss": 0.17658621072769165 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.2541870176792145, + "learning_rate": 2.6338406761849647e-05, + "loss": 0.2944, + "step": 11631, + "teacher_loss": 0.2988821864128113 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.20130598545074463, + "learning_rate": 2.63369196576808e-05, + "loss": 0.3598, + "step": 11632, + "teacher_loss": 0.3774341940879822 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.25852763652801514, + "learning_rate": 2.6335432293590377e-05, + "loss": 0.1703, + "step": 11633, + "teacher_loss": 0.16053098440170288 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.7965031862258911, + "learning_rate": 2.633394466961247e-05, + "loss": 0.2626, + "step": 11634, + "teacher_loss": 0.20329627394676208 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6755613088607788, + "learning_rate": 2.6332456785781198e-05, + "loss": 0.2316, + "step": 11635, + "teacher_loss": 0.1823037564754486 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.4504447281360626, + "learning_rate": 2.6330968642130665e-05, + "loss": 0.2945, + "step": 11636, + "teacher_loss": 0.2771519124507904 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.3466416299343109, + "learning_rate": 2.6329480238694997e-05, + "loss": 0.2082, + "step": 11637, + "teacher_loss": 0.19284410774707794 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.4657096266746521, + "learning_rate": 2.6327991575508314e-05, + "loss": 0.2554, + "step": 11638, + "teacher_loss": 0.23197926580905914 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.3914950489997864, + "learning_rate": 2.6326502652604745e-05, + "loss": 0.2653, + "step": 11639, + "teacher_loss": 0.2512850761413574 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.4722428023815155, + "learning_rate": 2.632501347001843e-05, + "loss": 0.1823, + "step": 11640, + "teacher_loss": 0.15013083815574646 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6649363040924072, + "learning_rate": 2.6323524027783513e-05, + "loss": 0.3725, + "step": 11641, + "teacher_loss": 0.3400045931339264 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.39294326305389404, + "learning_rate": 2.6322034325934134e-05, + "loss": 0.2827, + "step": 11642, + "teacher_loss": 0.27043092250823975 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.6809706687927246, + "learning_rate": 2.6320544364504457e-05, + "loss": 0.2533, + "step": 11643, + "teacher_loss": 0.20577149093151093 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5013149380683899, + "learning_rate": 2.6319054143528633e-05, + "loss": 0.2624, + "step": 11644, + "teacher_loss": 0.23586280643939972 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.5766116976737976, + "learning_rate": 2.6317563663040842e-05, + "loss": 0.1847, + "step": 11645, + "teacher_loss": 0.14115270972251892 + }, + { + "compression_loss": 0.0, + "epoch": 2.1, + "label_loss": 0.39663460850715637, + "learning_rate": 2.6316072923075246e-05, + "loss": 0.4255, + "step": 11646, + "teacher_loss": 0.42867511510849 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.41966313123703003, + "learning_rate": 2.6314581923666022e-05, + "loss": 0.2703, + "step": 11647, + "teacher_loss": 0.25365710258483887 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.6521139740943909, + "learning_rate": 2.6313090664847358e-05, + "loss": 0.3639, + "step": 11648, + "teacher_loss": 0.3318377137184143 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.8480049967765808, + "learning_rate": 2.6311599146653446e-05, + "loss": 0.3856, + "step": 11649, + "teacher_loss": 0.3342652916908264 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.7971171140670776, + "learning_rate": 2.6310107369118476e-05, + "loss": 0.3437, + "step": 11650, + "teacher_loss": 0.29330623149871826 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.4783957004547119, + "learning_rate": 2.6308615332276658e-05, + "loss": 0.2556, + "step": 11651, + "teacher_loss": 0.23084627091884613 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.6096468567848206, + "learning_rate": 2.6307123036162192e-05, + "loss": 0.3317, + "step": 11652, + "teacher_loss": 0.30079346895217896 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.19338107109069824, + "learning_rate": 2.6305630480809294e-05, + "loss": 0.2232, + "step": 11653, + "teacher_loss": 0.22650833427906036 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5757306814193726, + "learning_rate": 2.6304137666252185e-05, + "loss": 0.2733, + "step": 11654, + "teacher_loss": 0.2396695613861084 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.24960431456565857, + "learning_rate": 2.6302644592525098e-05, + "loss": 0.3587, + "step": 11655, + "teacher_loss": 0.3707889914512634 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.4055192470550537, + "learning_rate": 2.630115125966225e-05, + "loss": 0.2862, + "step": 11656, + "teacher_loss": 0.27299222350120544 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.34686267375946045, + "learning_rate": 2.629965766769789e-05, + "loss": 0.2228, + "step": 11657, + "teacher_loss": 0.20901940762996674 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.20489606261253357, + "learning_rate": 2.6298163816666258e-05, + "loss": 0.222, + "step": 11658, + "teacher_loss": 0.2238956093788147 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.369179904460907, + "learning_rate": 2.62966697066016e-05, + "loss": 0.3928, + "step": 11659, + "teacher_loss": 0.39540189504623413 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.3309842050075531, + "learning_rate": 2.6295175337538178e-05, + "loss": 0.2053, + "step": 11660, + "teacher_loss": 0.19132784008979797 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 1.2091197967529297, + "learning_rate": 2.6293680709510247e-05, + "loss": 0.4527, + "step": 11661, + "teacher_loss": 0.3685988187789917 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5470733046531677, + "learning_rate": 2.629218582255208e-05, + "loss": 0.4332, + "step": 11662, + "teacher_loss": 0.4206019639968872 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.802361011505127, + "learning_rate": 2.629069067669795e-05, + "loss": 0.514, + "step": 11663, + "teacher_loss": 0.48192915320396423 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.42300403118133545, + "learning_rate": 2.628919527198213e-05, + "loss": 0.2187, + "step": 11664, + "teacher_loss": 0.19602477550506592 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.329264760017395, + "learning_rate": 2.628769960843891e-05, + "loss": 0.2949, + "step": 11665, + "teacher_loss": 0.29107093811035156 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.7769129872322083, + "learning_rate": 2.628620368610258e-05, + "loss": 0.3332, + "step": 11666, + "teacher_loss": 0.2838786542415619 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.796055793762207, + "learning_rate": 2.6284707505007442e-05, + "loss": 0.2779, + "step": 11667, + "teacher_loss": 0.22037874162197113 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.42795711755752563, + "learning_rate": 2.6283211065187787e-05, + "loss": 0.334, + "step": 11668, + "teacher_loss": 0.3235991597175598 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.78924560546875, + "learning_rate": 2.6281714366677935e-05, + "loss": 0.3358, + "step": 11669, + "teacher_loss": 0.28545546531677246 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5016790628433228, + "learning_rate": 2.6280217409512196e-05, + "loss": 0.2291, + "step": 11670, + "teacher_loss": 0.19881504774093628 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.4088430404663086, + "learning_rate": 2.6278720193724893e-05, + "loss": 0.2201, + "step": 11671, + "teacher_loss": 0.19909584522247314 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.49580615758895874, + "learning_rate": 2.627722271935035e-05, + "loss": 0.2706, + "step": 11672, + "teacher_loss": 0.24553239345550537 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.23618587851524353, + "learning_rate": 2.62757249864229e-05, + "loss": 0.2304, + "step": 11673, + "teacher_loss": 0.22970931231975555 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.2461511194705963, + "learning_rate": 2.6274226994976885e-05, + "loss": 0.1661, + "step": 11674, + "teacher_loss": 0.15725934505462646 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.850604236125946, + "learning_rate": 2.6272728745046647e-05, + "loss": 0.3856, + "step": 11675, + "teacher_loss": 0.333961546421051 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.28365811705589294, + "learning_rate": 2.6271230236666534e-05, + "loss": 0.2551, + "step": 11676, + "teacher_loss": 0.2519356310367584 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5430283546447754, + "learning_rate": 2.6269731469870906e-05, + "loss": 0.2612, + "step": 11677, + "teacher_loss": 0.22983039915561676 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.7752373814582825, + "learning_rate": 2.626823244469412e-05, + "loss": 0.2978, + "step": 11678, + "teacher_loss": 0.24476996064186096 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.25934267044067383, + "learning_rate": 2.6266733161170553e-05, + "loss": 0.1979, + "step": 11679, + "teacher_loss": 0.1911078691482544 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.41404810547828674, + "learning_rate": 2.626523361933457e-05, + "loss": 0.2255, + "step": 11680, + "teacher_loss": 0.2045830339193344 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5188248157501221, + "learning_rate": 2.626373381922056e-05, + "loss": 0.3525, + "step": 11681, + "teacher_loss": 0.3340300917625427 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.32895123958587646, + "learning_rate": 2.62622337608629e-05, + "loss": 0.2506, + "step": 11682, + "teacher_loss": 0.24193879961967468 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.45395970344543457, + "learning_rate": 2.626073344429599e-05, + "loss": 0.2051, + "step": 11683, + "teacher_loss": 0.17749321460723877 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5270957946777344, + "learning_rate": 2.625923286955422e-05, + "loss": 0.2872, + "step": 11684, + "teacher_loss": 0.2605031728744507 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.456696093082428, + "learning_rate": 2.6257732036671995e-05, + "loss": 0.2282, + "step": 11685, + "teacher_loss": 0.20284131169319153 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.8772329092025757, + "learning_rate": 2.625623094568373e-05, + "loss": 0.3602, + "step": 11686, + "teacher_loss": 0.3027776777744293 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.4781636595726013, + "learning_rate": 2.6254729596623835e-05, + "loss": 0.4559, + "step": 11687, + "teacher_loss": 0.45338141918182373 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.2834071218967438, + "learning_rate": 2.6253227989526737e-05, + "loss": 0.2087, + "step": 11688, + "teacher_loss": 0.20041434466838837 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.7103962302207947, + "learning_rate": 2.625172612442686e-05, + "loss": 0.2636, + "step": 11689, + "teacher_loss": 0.2139425426721573 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 1.012774109840393, + "learning_rate": 2.6250224001358635e-05, + "loss": 0.4744, + "step": 11690, + "teacher_loss": 0.41453754901885986 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.36150819063186646, + "learning_rate": 2.6248721620356504e-05, + "loss": 0.2668, + "step": 11691, + "teacher_loss": 0.2562660276889801 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.8701244592666626, + "learning_rate": 2.6247218981454915e-05, + "loss": 0.3206, + "step": 11692, + "teacher_loss": 0.2595844864845276 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.6041348576545715, + "learning_rate": 2.6245716084688315e-05, + "loss": 0.4924, + "step": 11693, + "teacher_loss": 0.4800390601158142 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.676291286945343, + "learning_rate": 2.6244212930091156e-05, + "loss": 0.2538, + "step": 11694, + "teacher_loss": 0.20687994360923767 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.580389142036438, + "learning_rate": 2.6242709517697908e-05, + "loss": 0.3332, + "step": 11695, + "teacher_loss": 0.3057156801223755 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.29815980792045593, + "learning_rate": 2.6241205847543047e-05, + "loss": 0.2674, + "step": 11696, + "teacher_loss": 0.26397261023521423 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.8606027364730835, + "learning_rate": 2.623970191966103e-05, + "loss": 0.3569, + "step": 11697, + "teacher_loss": 0.3008785843849182 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.4508809447288513, + "learning_rate": 2.6238197734086346e-05, + "loss": 0.2334, + "step": 11698, + "teacher_loss": 0.2092401683330536 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.2732694149017334, + "learning_rate": 2.6236693290853486e-05, + "loss": 0.1813, + "step": 11699, + "teacher_loss": 0.17109918594360352 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.3855234384536743, + "learning_rate": 2.6235188589996933e-05, + "loss": 0.3228, + "step": 11700, + "teacher_loss": 0.31583231687545776 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.5695276260375977, + "learning_rate": 2.6233683631551197e-05, + "loss": 0.3078, + "step": 11701, + "teacher_loss": 0.2787608504295349 + }, + { + "compression_loss": 0.0, + "epoch": 2.11, + "label_loss": 0.8114451169967651, + "learning_rate": 2.623217841555077e-05, + "loss": 0.3951, + "step": 11702, + "teacher_loss": 0.34884822368621826 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3266856074333191, + "learning_rate": 2.6230672942030172e-05, + "loss": 0.285, + "step": 11703, + "teacher_loss": 0.2803923189640045 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.38466620445251465, + "learning_rate": 2.6229167211023913e-05, + "loss": 0.1891, + "step": 11704, + "teacher_loss": 0.1673886775970459 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.21358650922775269, + "learning_rate": 2.6227661222566516e-05, + "loss": 0.2028, + "step": 11705, + "teacher_loss": 0.20161615312099457 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.21007519960403442, + "learning_rate": 2.6226154976692513e-05, + "loss": 0.1669, + "step": 11706, + "teacher_loss": 0.16212603449821472 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.44750118255615234, + "learning_rate": 2.6224648473436432e-05, + "loss": 0.4428, + "step": 11707, + "teacher_loss": 0.44226568937301636 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4854198098182678, + "learning_rate": 2.6223141712832813e-05, + "loss": 0.2205, + "step": 11708, + "teacher_loss": 0.19101554155349731 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.40673476457595825, + "learning_rate": 2.62216346949162e-05, + "loss": 0.1805, + "step": 11709, + "teacher_loss": 0.15531401336193085 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.2709745168685913, + "learning_rate": 2.6220127419721157e-05, + "loss": 0.2087, + "step": 11710, + "teacher_loss": 0.20177140831947327 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.5460803508758545, + "learning_rate": 2.6218619887282227e-05, + "loss": 0.2401, + "step": 11711, + "teacher_loss": 0.20607876777648926 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.32501888275146484, + "learning_rate": 2.6217112097633977e-05, + "loss": 0.2582, + "step": 11712, + "teacher_loss": 0.2508161664009094 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.27705562114715576, + "learning_rate": 2.6215604050810977e-05, + "loss": 0.2049, + "step": 11713, + "teacher_loss": 0.19687145948410034 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.44990891218185425, + "learning_rate": 2.62140957468478e-05, + "loss": 0.2435, + "step": 11714, + "teacher_loss": 0.22052612900733948 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3055342137813568, + "learning_rate": 2.6212587185779036e-05, + "loss": 0.1773, + "step": 11715, + "teacher_loss": 0.16301541030406952 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4425654411315918, + "learning_rate": 2.6211078367639262e-05, + "loss": 0.444, + "step": 11716, + "teacher_loss": 0.44413354992866516 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.2734777629375458, + "learning_rate": 2.6209569292463074e-05, + "loss": 0.4187, + "step": 11717, + "teacher_loss": 0.43479156494140625 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3884049654006958, + "learning_rate": 2.620805996028507e-05, + "loss": 0.2121, + "step": 11718, + "teacher_loss": 0.19250774383544922 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 1.1643576622009277, + "learning_rate": 2.6206550371139853e-05, + "loss": 0.4344, + "step": 11719, + "teacher_loss": 0.35330379009246826 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4396713078022003, + "learning_rate": 2.6205040525062036e-05, + "loss": 0.2801, + "step": 11720, + "teacher_loss": 0.2623355984687805 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.7217541337013245, + "learning_rate": 2.6203530422086234e-05, + "loss": 0.2726, + "step": 11721, + "teacher_loss": 0.22271960973739624 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.5310426354408264, + "learning_rate": 2.620202006224707e-05, + "loss": 0.3088, + "step": 11722, + "teacher_loss": 0.2840694189071655 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4304261803627014, + "learning_rate": 2.6200509445579167e-05, + "loss": 0.2749, + "step": 11723, + "teacher_loss": 0.257620245218277 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4293491244316101, + "learning_rate": 2.6198998572117168e-05, + "loss": 0.2135, + "step": 11724, + "teacher_loss": 0.18951985239982605 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.5550104379653931, + "learning_rate": 2.6197487441895705e-05, + "loss": 0.2482, + "step": 11725, + "teacher_loss": 0.21409985423088074 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.348992258310318, + "learning_rate": 2.6195976054949432e-05, + "loss": 0.2874, + "step": 11726, + "teacher_loss": 0.2805972099304199 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.33255815505981445, + "learning_rate": 2.619446441131299e-05, + "loss": 0.198, + "step": 11727, + "teacher_loss": 0.1829938143491745 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.18563006818294525, + "learning_rate": 2.6192952511021045e-05, + "loss": 0.2228, + "step": 11728, + "teacher_loss": 0.2268821746110916 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.5308411121368408, + "learning_rate": 2.6191440354108257e-05, + "loss": 0.4484, + "step": 11729, + "teacher_loss": 0.4392498731613159 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.2919258773326874, + "learning_rate": 2.6189927940609297e-05, + "loss": 0.2097, + "step": 11730, + "teacher_loss": 0.2005467414855957 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.44020524621009827, + "learning_rate": 2.6188415270558833e-05, + "loss": 0.4633, + "step": 11731, + "teacher_loss": 0.4659165143966675 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.7232972979545593, + "learning_rate": 2.6186902343991556e-05, + "loss": 0.304, + "step": 11732, + "teacher_loss": 0.25739309191703796 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.32231825590133667, + "learning_rate": 2.6185389160942147e-05, + "loss": 0.1691, + "step": 11733, + "teacher_loss": 0.1520366668701172 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3430689871311188, + "learning_rate": 2.61838757214453e-05, + "loss": 0.2391, + "step": 11734, + "teacher_loss": 0.22759392857551575 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.207975834608078, + "learning_rate": 2.6182362025535714e-05, + "loss": 0.2289, + "step": 11735, + "teacher_loss": 0.23123089969158173 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.44402527809143066, + "learning_rate": 2.6180848073248092e-05, + "loss": 0.2475, + "step": 11736, + "teacher_loss": 0.22569337487220764 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.6652697324752808, + "learning_rate": 2.6179333864617147e-05, + "loss": 0.303, + "step": 11737, + "teacher_loss": 0.262746661901474 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.13566963374614716, + "learning_rate": 2.6177819399677593e-05, + "loss": 0.2328, + "step": 11738, + "teacher_loss": 0.24359755218029022 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.36873966455459595, + "learning_rate": 2.6176304678464154e-05, + "loss": 0.2398, + "step": 11739, + "teacher_loss": 0.22551871836185455 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4505552649497986, + "learning_rate": 2.6174789701011557e-05, + "loss": 0.3263, + "step": 11740, + "teacher_loss": 0.31244975328445435 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4136297404766083, + "learning_rate": 2.6173274467354533e-05, + "loss": 0.3431, + "step": 11741, + "teacher_loss": 0.335308313369751 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.20684108138084412, + "learning_rate": 2.617175897752783e-05, + "loss": 0.2296, + "step": 11742, + "teacher_loss": 0.23208823800086975 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.6587156653404236, + "learning_rate": 2.6170243231566183e-05, + "loss": 0.3759, + "step": 11743, + "teacher_loss": 0.34444117546081543 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.48333656787872314, + "learning_rate": 2.6168727229504352e-05, + "loss": 0.3458, + "step": 11744, + "teacher_loss": 0.33054959774017334 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3705843389034271, + "learning_rate": 2.6167210971377092e-05, + "loss": 0.2402, + "step": 11745, + "teacher_loss": 0.22572600841522217 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.4790026545524597, + "learning_rate": 2.6165694457219162e-05, + "loss": 0.317, + "step": 11746, + "teacher_loss": 0.29898345470428467 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.17654390633106232, + "learning_rate": 2.6164177687065337e-05, + "loss": 0.2396, + "step": 11747, + "teacher_loss": 0.2465890794992447 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.6947540044784546, + "learning_rate": 2.6162660660950386e-05, + "loss": 0.2313, + "step": 11748, + "teacher_loss": 0.17977529764175415 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.37498557567596436, + "learning_rate": 2.6161143378909096e-05, + "loss": 0.3694, + "step": 11749, + "teacher_loss": 0.36881938576698303 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 1.3527212142944336, + "learning_rate": 2.6159625840976253e-05, + "loss": 0.3997, + "step": 11750, + "teacher_loss": 0.2938268184661865 + }, + { + "epoch": 2.12, + "eval_exact_match": 79.75402081362347, + "eval_f1": 87.17598981359633, + "step": 11750 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.19763246178627014, + "learning_rate": 2.615810804718665e-05, + "loss": 0.1434, + "step": 11751, + "teacher_loss": 0.13740935921669006 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3919205069541931, + "learning_rate": 2.6156589997575072e-05, + "loss": 0.3446, + "step": 11752, + "teacher_loss": 0.339397668838501 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.35283082723617554, + "learning_rate": 2.6155071692176348e-05, + "loss": 0.2074, + "step": 11753, + "teacher_loss": 0.19122996926307678 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.33845284581184387, + "learning_rate": 2.6153553131025268e-05, + "loss": 0.236, + "step": 11754, + "teacher_loss": 0.22462353110313416 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.5809452533721924, + "learning_rate": 2.6152034314156656e-05, + "loss": 0.2105, + "step": 11755, + "teacher_loss": 0.1693117767572403 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.654186487197876, + "learning_rate": 2.6150515241605334e-05, + "loss": 0.342, + "step": 11756, + "teacher_loss": 0.307292640209198 + }, + { + "compression_loss": 0.0, + "epoch": 2.12, + "label_loss": 0.3123341500759125, + "learning_rate": 2.6148995913406123e-05, + "loss": 0.3114, + "step": 11757, + "teacher_loss": 0.311333030462265 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.7260026335716248, + "learning_rate": 2.6147476329593867e-05, + "loss": 0.4434, + "step": 11758, + "teacher_loss": 0.41197606921195984 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.5350506901741028, + "learning_rate": 2.61459564902034e-05, + "loss": 0.2817, + "step": 11759, + "teacher_loss": 0.25350135564804077 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.6239803433418274, + "learning_rate": 2.6144436395269566e-05, + "loss": 0.2973, + "step": 11760, + "teacher_loss": 0.26102250814437866 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.47594326734542847, + "learning_rate": 2.614291604482722e-05, + "loss": 0.38, + "step": 11761, + "teacher_loss": 0.36938387155532837 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.5479391813278198, + "learning_rate": 2.6141395438911216e-05, + "loss": 0.233, + "step": 11762, + "teacher_loss": 0.19797459244728088 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.290363073348999, + "learning_rate": 2.613987457755642e-05, + "loss": 0.2177, + "step": 11763, + "teacher_loss": 0.20961186289787292 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.6466919183731079, + "learning_rate": 2.6138353460797695e-05, + "loss": 0.3123, + "step": 11764, + "teacher_loss": 0.2751414179801941 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.7366952896118164, + "learning_rate": 2.6136832088669927e-05, + "loss": 0.3022, + "step": 11765, + "teacher_loss": 0.25387638807296753 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.7886039018630981, + "learning_rate": 2.6135310461207984e-05, + "loss": 0.6488, + "step": 11766, + "teacher_loss": 0.6333101391792297 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.31554722785949707, + "learning_rate": 2.613378857844676e-05, + "loss": 0.2088, + "step": 11767, + "teacher_loss": 0.19695937633514404 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.45015841722488403, + "learning_rate": 2.613226644042114e-05, + "loss": 0.2931, + "step": 11768, + "teacher_loss": 0.27564364671707153 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.4002137780189514, + "learning_rate": 2.6130744047166034e-05, + "loss": 0.255, + "step": 11769, + "teacher_loss": 0.23888307809829712 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3668873906135559, + "learning_rate": 2.6129221398716333e-05, + "loss": 0.2605, + "step": 11770, + "teacher_loss": 0.24871167540550232 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 1.1099088191986084, + "learning_rate": 2.6127698495106955e-05, + "loss": 0.514, + "step": 11771, + "teacher_loss": 0.4477843940258026 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.4650160074234009, + "learning_rate": 2.6126175336372808e-05, + "loss": 0.3301, + "step": 11772, + "teacher_loss": 0.3151111304759979 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3635333478450775, + "learning_rate": 2.6124651922548825e-05, + "loss": 0.2498, + "step": 11773, + "teacher_loss": 0.23721134662628174 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3595234751701355, + "learning_rate": 2.6123128253669926e-05, + "loss": 0.3037, + "step": 11774, + "teacher_loss": 0.29753702878952026 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.4180101752281189, + "learning_rate": 2.6121604329771043e-05, + "loss": 0.2116, + "step": 11775, + "teacher_loss": 0.18871405720710754 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3541082739830017, + "learning_rate": 2.6120080150887118e-05, + "loss": 0.2568, + "step": 11776, + "teacher_loss": 0.24599777162075043 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.5516923069953918, + "learning_rate": 2.6118555717053097e-05, + "loss": 0.325, + "step": 11777, + "teacher_loss": 0.29986101388931274 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.33248043060302734, + "learning_rate": 2.611703102830392e-05, + "loss": 0.2227, + "step": 11778, + "teacher_loss": 0.21045032143592834 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.14766092598438263, + "learning_rate": 2.6115506084674564e-05, + "loss": 0.2137, + "step": 11779, + "teacher_loss": 0.22106239199638367 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.2766382694244385, + "learning_rate": 2.6113980886199974e-05, + "loss": 0.1942, + "step": 11780, + "teacher_loss": 0.1850774884223938 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.7963391542434692, + "learning_rate": 2.6112455432915122e-05, + "loss": 0.278, + "step": 11781, + "teacher_loss": 0.22037044167518616 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.7411714792251587, + "learning_rate": 2.6110929724854987e-05, + "loss": 0.2547, + "step": 11782, + "teacher_loss": 0.20061588287353516 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3920944333076477, + "learning_rate": 2.610940376205454e-05, + "loss": 0.2193, + "step": 11783, + "teacher_loss": 0.20012560486793518 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.570035457611084, + "learning_rate": 2.6107877544548776e-05, + "loss": 0.2094, + "step": 11784, + "teacher_loss": 0.16934117674827576 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.8104901313781738, + "learning_rate": 2.6106351072372688e-05, + "loss": 0.5458, + "step": 11785, + "teacher_loss": 0.5163378715515137 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3749673068523407, + "learning_rate": 2.6104824345561264e-05, + "loss": 0.2612, + "step": 11786, + "teacher_loss": 0.2485107183456421 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.43200820684432983, + "learning_rate": 2.6103297364149506e-05, + "loss": 0.3293, + "step": 11787, + "teacher_loss": 0.3178657591342926 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.6073095798492432, + "learning_rate": 2.610177012817244e-05, + "loss": 0.2934, + "step": 11788, + "teacher_loss": 0.25854456424713135 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.4115005135536194, + "learning_rate": 2.6100242637665064e-05, + "loss": 0.3139, + "step": 11789, + "teacher_loss": 0.3030462861061096 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.36522796750068665, + "learning_rate": 2.6098714892662405e-05, + "loss": 0.2811, + "step": 11790, + "teacher_loss": 0.2717519700527191 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.21329163014888763, + "learning_rate": 2.609718689319949e-05, + "loss": 0.2455, + "step": 11791, + "teacher_loss": 0.24907556176185608 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3402857780456543, + "learning_rate": 2.6095658639311348e-05, + "loss": 0.2211, + "step": 11792, + "teacher_loss": 0.20783844590187073 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.5102912187576294, + "learning_rate": 2.609413013103302e-05, + "loss": 0.3836, + "step": 11793, + "teacher_loss": 0.36948350071907043 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.19637587666511536, + "learning_rate": 2.6092601368399553e-05, + "loss": 0.1778, + "step": 11794, + "teacher_loss": 0.17571675777435303 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.486598402261734, + "learning_rate": 2.6091072351445993e-05, + "loss": 0.3317, + "step": 11795, + "teacher_loss": 0.31446194648742676 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.24823614954948425, + "learning_rate": 2.6089543080207395e-05, + "loss": 0.1949, + "step": 11796, + "teacher_loss": 0.18901577591896057 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.6965166926383972, + "learning_rate": 2.6088013554718825e-05, + "loss": 0.4815, + "step": 11797, + "teacher_loss": 0.45760977268218994 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.6267510652542114, + "learning_rate": 2.6086483775015345e-05, + "loss": 0.4206, + "step": 11798, + "teacher_loss": 0.39770960807800293 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.24668660759925842, + "learning_rate": 2.6084953741132033e-05, + "loss": 0.171, + "step": 11799, + "teacher_loss": 0.16258426010608673 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.4710262417793274, + "learning_rate": 2.6083423453103966e-05, + "loss": 0.2121, + "step": 11800, + "teacher_loss": 0.18337951600551605 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.32312625646591187, + "learning_rate": 2.6081892910966228e-05, + "loss": 0.2235, + "step": 11801, + "teacher_loss": 0.21241521835327148 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.218858003616333, + "learning_rate": 2.608036211475391e-05, + "loss": 0.3959, + "step": 11802, + "teacher_loss": 0.41560009121894836 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.49183446168899536, + "learning_rate": 2.607883106450211e-05, + "loss": 0.2768, + "step": 11803, + "teacher_loss": 0.2529560625553131 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3915698230266571, + "learning_rate": 2.607729976024593e-05, + "loss": 0.201, + "step": 11804, + "teacher_loss": 0.17986398935317993 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.44814667105674744, + "learning_rate": 2.6075768202020483e-05, + "loss": 0.2395, + "step": 11805, + "teacher_loss": 0.21630804240703583 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.2833609879016876, + "learning_rate": 2.6074236389860873e-05, + "loss": 0.372, + "step": 11806, + "teacher_loss": 0.3818890154361725 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.2659551799297333, + "learning_rate": 2.6072704323802223e-05, + "loss": 0.1641, + "step": 11807, + "teacher_loss": 0.15273050963878632 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.5055026412010193, + "learning_rate": 2.6071172003879667e-05, + "loss": 0.3614, + "step": 11808, + "teacher_loss": 0.3453558683395386 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.13391844928264618, + "learning_rate": 2.6069639430128323e-05, + "loss": 0.2053, + "step": 11809, + "teacher_loss": 0.21320462226867676 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.9493470191955566, + "learning_rate": 2.606810660258334e-05, + "loss": 0.6174, + "step": 11810, + "teacher_loss": 0.5805208683013916 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.16981101036071777, + "learning_rate": 2.6066573521279856e-05, + "loss": 0.1785, + "step": 11811, + "teacher_loss": 0.1795172393321991 + }, + { + "compression_loss": 0.0, + "epoch": 2.13, + "label_loss": 0.3450591266155243, + "learning_rate": 2.606504018625302e-05, + "loss": 0.2282, + "step": 11812, + "teacher_loss": 0.2152593731880188 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.4901118576526642, + "learning_rate": 2.6063506597537985e-05, + "loss": 0.3053, + "step": 11813, + "teacher_loss": 0.284729927778244 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.4742254912853241, + "learning_rate": 2.6061972755169916e-05, + "loss": 0.2247, + "step": 11814, + "teacher_loss": 0.19695082306861877 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6243545413017273, + "learning_rate": 2.606043865918398e-05, + "loss": 0.2953, + "step": 11815, + "teacher_loss": 0.2587278187274933 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5326356291770935, + "learning_rate": 2.605890430961534e-05, + "loss": 0.1761, + "step": 11816, + "teacher_loss": 0.13653619587421417 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.2755795419216156, + "learning_rate": 2.6057369706499184e-05, + "loss": 0.1885, + "step": 11817, + "teacher_loss": 0.17877671122550964 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.8960363864898682, + "learning_rate": 2.605583484987069e-05, + "loss": 0.4244, + "step": 11818, + "teacher_loss": 0.372050404548645 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3315697908401489, + "learning_rate": 2.605429973976505e-05, + "loss": 0.388, + "step": 11819, + "teacher_loss": 0.39432376623153687 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.42155885696411133, + "learning_rate": 2.6052764376217463e-05, + "loss": 0.3903, + "step": 11820, + "teacher_loss": 0.38677978515625 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 1.0188727378845215, + "learning_rate": 2.6051228759263124e-05, + "loss": 0.2918, + "step": 11821, + "teacher_loss": 0.21098877489566803 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3923536539077759, + "learning_rate": 2.6049692888937246e-05, + "loss": 0.2795, + "step": 11822, + "teacher_loss": 0.2670028805732727 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3829669654369354, + "learning_rate": 2.604815676527504e-05, + "loss": 0.2397, + "step": 11823, + "teacher_loss": 0.22377602756023407 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5543052554130554, + "learning_rate": 2.6046620388311718e-05, + "loss": 0.4596, + "step": 11824, + "teacher_loss": 0.44908061623573303 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3174785077571869, + "learning_rate": 2.6045083758082513e-05, + "loss": 0.278, + "step": 11825, + "teacher_loss": 0.27361518144607544 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.7123744487762451, + "learning_rate": 2.604354687462265e-05, + "loss": 0.3581, + "step": 11826, + "teacher_loss": 0.31871408224105835 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6533921360969543, + "learning_rate": 2.604200973796737e-05, + "loss": 0.2568, + "step": 11827, + "teacher_loss": 0.21268978714942932 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5691380500793457, + "learning_rate": 2.604047234815191e-05, + "loss": 0.2173, + "step": 11828, + "teacher_loss": 0.17817473411560059 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3701571226119995, + "learning_rate": 2.6038934705211523e-05, + "loss": 0.2193, + "step": 11829, + "teacher_loss": 0.202579528093338 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6815152168273926, + "learning_rate": 2.603739680918146e-05, + "loss": 0.2964, + "step": 11830, + "teacher_loss": 0.25364288687705994 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6011703014373779, + "learning_rate": 2.6035858660096975e-05, + "loss": 0.3749, + "step": 11831, + "teacher_loss": 0.34974128007888794 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.9137331247329712, + "learning_rate": 2.603432025799334e-05, + "loss": 0.3293, + "step": 11832, + "teacher_loss": 0.2643439769744873 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.7757933735847473, + "learning_rate": 2.6032781602905828e-05, + "loss": 0.2992, + "step": 11833, + "teacher_loss": 0.24629098176956177 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.30560919642448425, + "learning_rate": 2.603124269486971e-05, + "loss": 0.3159, + "step": 11834, + "teacher_loss": 0.31700631976127625 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5418651103973389, + "learning_rate": 2.6029703533920267e-05, + "loss": 0.3041, + "step": 11835, + "teacher_loss": 0.2777123749256134 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.27882617712020874, + "learning_rate": 2.6028164120092793e-05, + "loss": 0.2174, + "step": 11836, + "teacher_loss": 0.21056890487670898 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3406464159488678, + "learning_rate": 2.6026624453422583e-05, + "loss": 0.2651, + "step": 11837, + "teacher_loss": 0.25672852993011475 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3227473795413971, + "learning_rate": 2.602508453394493e-05, + "loss": 0.3608, + "step": 11838, + "teacher_loss": 0.3650456666946411 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.23000863194465637, + "learning_rate": 2.6023544361695147e-05, + "loss": 0.2017, + "step": 11839, + "teacher_loss": 0.19850674271583557 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.45348310470581055, + "learning_rate": 2.602200393670854e-05, + "loss": 0.2381, + "step": 11840, + "teacher_loss": 0.21413525938987732 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.2759324312210083, + "learning_rate": 2.6020463259020424e-05, + "loss": 0.306, + "step": 11841, + "teacher_loss": 0.30939337611198425 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.7710205316543579, + "learning_rate": 2.601892232866613e-05, + "loss": 0.3008, + "step": 11842, + "teacher_loss": 0.24857556819915771 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.2691395878791809, + "learning_rate": 2.6017381145680984e-05, + "loss": 0.1745, + "step": 11843, + "teacher_loss": 0.16393698751926422 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.31048572063446045, + "learning_rate": 2.601583971010032e-05, + "loss": 0.3619, + "step": 11844, + "teacher_loss": 0.3676021695137024 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.8053724765777588, + "learning_rate": 2.6014298021959482e-05, + "loss": 0.3691, + "step": 11845, + "teacher_loss": 0.320635586977005 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.2519734501838684, + "learning_rate": 2.6012756081293807e-05, + "loss": 0.1334, + "step": 11846, + "teacher_loss": 0.12018124759197235 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.4166175127029419, + "learning_rate": 2.6011213888138658e-05, + "loss": 0.3661, + "step": 11847, + "teacher_loss": 0.3604452610015869 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6790676116943359, + "learning_rate": 2.6009671442529385e-05, + "loss": 0.271, + "step": 11848, + "teacher_loss": 0.22569167613983154 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5332669019699097, + "learning_rate": 2.6008128744501353e-05, + "loss": 0.496, + "step": 11849, + "teacher_loss": 0.4918893575668335 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.17910028994083405, + "learning_rate": 2.6006585794089937e-05, + "loss": 0.1583, + "step": 11850, + "teacher_loss": 0.1560365855693817 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.35121622681617737, + "learning_rate": 2.6005042591330506e-05, + "loss": 0.2388, + "step": 11851, + "teacher_loss": 0.22635364532470703 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.24981184303760529, + "learning_rate": 2.6003499136258446e-05, + "loss": 0.2528, + "step": 11852, + "teacher_loss": 0.25310081243515015 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.4699033200740814, + "learning_rate": 2.6001955428909136e-05, + "loss": 0.287, + "step": 11853, + "teacher_loss": 0.2666309177875519 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5475835800170898, + "learning_rate": 2.600041146931798e-05, + "loss": 0.2037, + "step": 11854, + "teacher_loss": 0.16545702517032623 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6502259373664856, + "learning_rate": 2.5998867257520363e-05, + "loss": 0.3642, + "step": 11855, + "teacher_loss": 0.33246415853500366 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.14808988571166992, + "learning_rate": 2.59973227935517e-05, + "loss": 0.2296, + "step": 11856, + "teacher_loss": 0.2386157363653183 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.4042413830757141, + "learning_rate": 2.5995778077447393e-05, + "loss": 0.4354, + "step": 11857, + "teacher_loss": 0.4388611316680908 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.18771639466285706, + "learning_rate": 2.599423310924287e-05, + "loss": 0.2521, + "step": 11858, + "teacher_loss": 0.2592393159866333 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.2643285393714905, + "learning_rate": 2.599268788897354e-05, + "loss": 0.2295, + "step": 11859, + "teacher_loss": 0.2256062626838684 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5425845980644226, + "learning_rate": 2.599114241667483e-05, + "loss": 0.4539, + "step": 11860, + "teacher_loss": 0.44408178329467773 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.40210098028182983, + "learning_rate": 2.5989596692382182e-05, + "loss": 0.2596, + "step": 11861, + "teacher_loss": 0.24373117089271545 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.6213204860687256, + "learning_rate": 2.598805071613103e-05, + "loss": 0.3646, + "step": 11862, + "teacher_loss": 0.3361297845840454 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 1.0089659690856934, + "learning_rate": 2.598650448795682e-05, + "loss": 0.4561, + "step": 11863, + "teacher_loss": 0.3946387767791748 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.3167431950569153, + "learning_rate": 2.5984958007894995e-05, + "loss": 0.2141, + "step": 11864, + "teacher_loss": 0.20265866816043854 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.31868553161621094, + "learning_rate": 2.598341127598103e-05, + "loss": 0.3728, + "step": 11865, + "teacher_loss": 0.37881243228912354 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.17840471863746643, + "learning_rate": 2.5981864292250362e-05, + "loss": 0.2437, + "step": 11866, + "teacher_loss": 0.2510058879852295 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.5543650388717651, + "learning_rate": 2.5980317056738477e-05, + "loss": 0.3757, + "step": 11867, + "teacher_loss": 0.35587215423583984 + }, + { + "compression_loss": 0.0, + "epoch": 2.14, + "label_loss": 0.4384593963623047, + "learning_rate": 2.597876956948084e-05, + "loss": 0.2459, + "step": 11868, + "teacher_loss": 0.2244565188884735 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.573606014251709, + "learning_rate": 2.5977221830512935e-05, + "loss": 0.237, + "step": 11869, + "teacher_loss": 0.19965162873268127 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5061002969741821, + "learning_rate": 2.5975673839870246e-05, + "loss": 0.3205, + "step": 11870, + "teacher_loss": 0.29989537596702576 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.4440218210220337, + "learning_rate": 2.597412559758826e-05, + "loss": 0.3027, + "step": 11871, + "teacher_loss": 0.2870480418205261 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.15619751811027527, + "learning_rate": 2.5972577103702477e-05, + "loss": 0.1645, + "step": 11872, + "teacher_loss": 0.16544204950332642 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.4934988021850586, + "learning_rate": 2.5971028358248396e-05, + "loss": 0.2573, + "step": 11873, + "teacher_loss": 0.2310059368610382 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.9463531374931335, + "learning_rate": 2.5969479361261533e-05, + "loss": 0.6353, + "step": 11874, + "teacher_loss": 0.6007082462310791 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.8638978600502014, + "learning_rate": 2.5967930112777393e-05, + "loss": 0.2894, + "step": 11875, + "teacher_loss": 0.22558185458183289 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.3971202075481415, + "learning_rate": 2.5966380612831496e-05, + "loss": 0.2495, + "step": 11876, + "teacher_loss": 0.23309175670146942 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.6802642345428467, + "learning_rate": 2.596483086145938e-05, + "loss": 0.4248, + "step": 11877, + "teacher_loss": 0.3964252471923828 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7412111163139343, + "learning_rate": 2.5963280858696558e-05, + "loss": 0.2801, + "step": 11878, + "teacher_loss": 0.22889423370361328 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7711864113807678, + "learning_rate": 2.5961730604578583e-05, + "loss": 0.3508, + "step": 11879, + "teacher_loss": 0.30411767959594727 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.28763967752456665, + "learning_rate": 2.596018009914098e-05, + "loss": 0.3535, + "step": 11880, + "teacher_loss": 0.36086538434028625 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.4015776515007019, + "learning_rate": 2.5958629342419315e-05, + "loss": 0.233, + "step": 11881, + "teacher_loss": 0.21422609686851501 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 1.0421007871627808, + "learning_rate": 2.5957078334449132e-05, + "loss": 0.3312, + "step": 11882, + "teacher_loss": 0.25220659375190735 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.259662389755249, + "learning_rate": 2.5955527075265995e-05, + "loss": 0.2113, + "step": 11883, + "teacher_loss": 0.2059817612171173 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5402747392654419, + "learning_rate": 2.595397556490547e-05, + "loss": 0.2503, + "step": 11884, + "teacher_loss": 0.2180628627538681 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.26193585991859436, + "learning_rate": 2.5952423803403126e-05, + "loss": 0.222, + "step": 11885, + "teacher_loss": 0.21754762530326843 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.23677082359790802, + "learning_rate": 2.5950871790794537e-05, + "loss": 0.1911, + "step": 11886, + "teacher_loss": 0.18603497743606567 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7284072637557983, + "learning_rate": 2.5949319527115292e-05, + "loss": 0.3807, + "step": 11887, + "teacher_loss": 0.34205758571624756 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7798953652381897, + "learning_rate": 2.5947767012400985e-05, + "loss": 0.3782, + "step": 11888, + "teacher_loss": 0.3336077928543091 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5711961984634399, + "learning_rate": 2.59462142466872e-05, + "loss": 0.3253, + "step": 11889, + "teacher_loss": 0.2979753017425537 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7904107570648193, + "learning_rate": 2.5944661230009533e-05, + "loss": 0.3771, + "step": 11890, + "teacher_loss": 0.33119094371795654 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5503579378128052, + "learning_rate": 2.5943107962403603e-05, + "loss": 0.28, + "step": 11891, + "teacher_loss": 0.24999725818634033 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5006875991821289, + "learning_rate": 2.594155444390502e-05, + "loss": 0.2232, + "step": 11892, + "teacher_loss": 0.19231688976287842 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.4584958553314209, + "learning_rate": 2.5940000674549398e-05, + "loss": 0.2258, + "step": 11893, + "teacher_loss": 0.1999293863773346 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.4369381070137024, + "learning_rate": 2.5938446654372357e-05, + "loss": 0.2421, + "step": 11894, + "teacher_loss": 0.22049680352210999 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.3314973711967468, + "learning_rate": 2.593689238340953e-05, + "loss": 0.285, + "step": 11895, + "teacher_loss": 0.27979743480682373 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.125652015209198, + "learning_rate": 2.5935337861696548e-05, + "loss": 0.2056, + "step": 11896, + "teacher_loss": 0.21443699300289154 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.29903966188430786, + "learning_rate": 2.5933783089269062e-05, + "loss": 0.1968, + "step": 11897, + "teacher_loss": 0.1854916512966156 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5764154195785522, + "learning_rate": 2.5932228066162704e-05, + "loss": 0.2746, + "step": 11898, + "teacher_loss": 0.24106967449188232 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5139930248260498, + "learning_rate": 2.5930672792413138e-05, + "loss": 0.2555, + "step": 11899, + "teacher_loss": 0.22674795985221863 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.20115339756011963, + "learning_rate": 2.5929117268056016e-05, + "loss": 0.2646, + "step": 11900, + "teacher_loss": 0.27169069647789 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5948539972305298, + "learning_rate": 2.5927561493127e-05, + "loss": 0.2505, + "step": 11901, + "teacher_loss": 0.21224600076675415 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.3971590995788574, + "learning_rate": 2.5926005467661763e-05, + "loss": 0.2606, + "step": 11902, + "teacher_loss": 0.24547350406646729 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5255863070487976, + "learning_rate": 2.5924449191695976e-05, + "loss": 0.351, + "step": 11903, + "teacher_loss": 0.33161526918411255 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7192397117614746, + "learning_rate": 2.5922892665265325e-05, + "loss": 0.3365, + "step": 11904, + "teacher_loss": 0.29401519894599915 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5191932916641235, + "learning_rate": 2.5921335888405493e-05, + "loss": 0.2447, + "step": 11905, + "teacher_loss": 0.21425354480743408 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.1840202510356903, + "learning_rate": 2.5919778861152172e-05, + "loss": 0.2149, + "step": 11906, + "teacher_loss": 0.21835049986839294 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.753964900970459, + "learning_rate": 2.5918221583541065e-05, + "loss": 0.5318, + "step": 11907, + "teacher_loss": 0.5071197748184204 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.2728998363018036, + "learning_rate": 2.5916664055607866e-05, + "loss": 0.2123, + "step": 11908, + "teacher_loss": 0.2055220901966095 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.38073262572288513, + "learning_rate": 2.5915106277388293e-05, + "loss": 0.321, + "step": 11909, + "teacher_loss": 0.3143864870071411 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5783706307411194, + "learning_rate": 2.591354824891806e-05, + "loss": 0.2702, + "step": 11910, + "teacher_loss": 0.23591876029968262 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.3509276807308197, + "learning_rate": 2.591198997023288e-05, + "loss": 0.1896, + "step": 11911, + "teacher_loss": 0.17167195677757263 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.17924711108207703, + "learning_rate": 2.5910431441368493e-05, + "loss": 0.1613, + "step": 11912, + "teacher_loss": 0.15934190154075623 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.2755981683731079, + "learning_rate": 2.5908872662360617e-05, + "loss": 0.248, + "step": 11913, + "teacher_loss": 0.24497190117835999 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.7753876447677612, + "learning_rate": 2.5907313633245007e-05, + "loss": 0.2482, + "step": 11914, + "teacher_loss": 0.18959349393844604 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.6539809703826904, + "learning_rate": 2.590575435405739e-05, + "loss": 0.3603, + "step": 11915, + "teacher_loss": 0.3276401162147522 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.43185028433799744, + "learning_rate": 2.5904194824833524e-05, + "loss": 0.3145, + "step": 11916, + "teacher_loss": 0.3014155328273773 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.3070920407772064, + "learning_rate": 2.5902635045609164e-05, + "loss": 0.2265, + "step": 11917, + "teacher_loss": 0.21750569343566895 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.5546842813491821, + "learning_rate": 2.590107501642007e-05, + "loss": 0.2936, + "step": 11918, + "teacher_loss": 0.2646271586418152 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.22234024107456207, + "learning_rate": 2.5899514737302007e-05, + "loss": 0.241, + "step": 11919, + "teacher_loss": 0.24312138557434082 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.538908839225769, + "learning_rate": 2.589795420829075e-05, + "loss": 0.3404, + "step": 11920, + "teacher_loss": 0.31835097074508667 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.29677772521972656, + "learning_rate": 2.589639342942208e-05, + "loss": 0.2134, + "step": 11921, + "teacher_loss": 0.20415177941322327 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.6847197413444519, + "learning_rate": 2.5894832400731778e-05, + "loss": 0.2459, + "step": 11922, + "teacher_loss": 0.1971079707145691 + }, + { + "compression_loss": 0.0, + "epoch": 2.15, + "label_loss": 0.3087640404701233, + "learning_rate": 2.5893271122255625e-05, + "loss": 0.2382, + "step": 11923, + "teacher_loss": 0.23032930493354797 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5024212598800659, + "learning_rate": 2.5891709594029437e-05, + "loss": 0.3413, + "step": 11924, + "teacher_loss": 0.32343506813049316 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.35786259174346924, + "learning_rate": 2.5890147816088994e-05, + "loss": 0.1907, + "step": 11925, + "teacher_loss": 0.17213281989097595 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5734649896621704, + "learning_rate": 2.5888585788470116e-05, + "loss": 0.3247, + "step": 11926, + "teacher_loss": 0.297050803899765 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.4436054229736328, + "learning_rate": 2.5887023511208606e-05, + "loss": 0.4209, + "step": 11927, + "teacher_loss": 0.41833817958831787 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.47309499979019165, + "learning_rate": 2.5885460984340293e-05, + "loss": 0.4554, + "step": 11928, + "teacher_loss": 0.45345252752304077 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.19974543154239655, + "learning_rate": 2.5883898207900997e-05, + "loss": 0.1923, + "step": 11929, + "teacher_loss": 0.19141972064971924 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.46143269538879395, + "learning_rate": 2.5882335181926546e-05, + "loss": 0.2373, + "step": 11930, + "teacher_loss": 0.2124168425798416 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.250038206577301, + "learning_rate": 2.5880771906452775e-05, + "loss": 0.1986, + "step": 11931, + "teacher_loss": 0.192842036485672 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.15559667348861694, + "learning_rate": 2.5879208381515525e-05, + "loss": 0.1781, + "step": 11932, + "teacher_loss": 0.1806262731552124 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.6108611822128296, + "learning_rate": 2.587764460715065e-05, + "loss": 0.3194, + "step": 11933, + "teacher_loss": 0.2870543301105499 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5526113510131836, + "learning_rate": 2.5876080583393993e-05, + "loss": 0.2412, + "step": 11934, + "teacher_loss": 0.20656481385231018 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5092881321907043, + "learning_rate": 2.5874516310281414e-05, + "loss": 0.2897, + "step": 11935, + "teacher_loss": 0.2652827501296997 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5741344094276428, + "learning_rate": 2.5872951787848784e-05, + "loss": 0.4561, + "step": 11936, + "teacher_loss": 0.4430006742477417 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.23296701908111572, + "learning_rate": 2.5871387016131965e-05, + "loss": 0.1774, + "step": 11937, + "teacher_loss": 0.17124879360198975 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.3152272701263428, + "learning_rate": 2.5869821995166834e-05, + "loss": 0.2424, + "step": 11938, + "teacher_loss": 0.23431754112243652 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.268160879611969, + "learning_rate": 2.586825672498928e-05, + "loss": 0.318, + "step": 11939, + "teacher_loss": 0.32356932759284973 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.42559123039245605, + "learning_rate": 2.586669120563518e-05, + "loss": 0.2329, + "step": 11940, + "teacher_loss": 0.21149706840515137 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.506427526473999, + "learning_rate": 2.5865125437140432e-05, + "loss": 0.3253, + "step": 11941, + "teacher_loss": 0.3051733374595642 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5129289031028748, + "learning_rate": 2.5863559419540933e-05, + "loss": 0.2933, + "step": 11942, + "teacher_loss": 0.26892921328544617 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.1196107342839241, + "learning_rate": 2.586199315287259e-05, + "loss": 0.2705, + "step": 11943, + "teacher_loss": 0.2872978448867798 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.6892185211181641, + "learning_rate": 2.5860426637171307e-05, + "loss": 0.3259, + "step": 11944, + "teacher_loss": 0.28552478551864624 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.20203818380832672, + "learning_rate": 2.5858859872473e-05, + "loss": 0.1576, + "step": 11945, + "teacher_loss": 0.15264225006103516 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.23604559898376465, + "learning_rate": 2.58572928588136e-05, + "loss": 0.1924, + "step": 11946, + "teacher_loss": 0.18756867945194244 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.38689279556274414, + "learning_rate": 2.5855725596229018e-05, + "loss": 0.2158, + "step": 11947, + "teacher_loss": 0.19682970643043518 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.7924243211746216, + "learning_rate": 2.5854158084755197e-05, + "loss": 0.2519, + "step": 11948, + "teacher_loss": 0.19183319807052612 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5708785057067871, + "learning_rate": 2.585259032442808e-05, + "loss": 0.3662, + "step": 11949, + "teacher_loss": 0.34349310398101807 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.7700195908546448, + "learning_rate": 2.58510223152836e-05, + "loss": 0.3334, + "step": 11950, + "teacher_loss": 0.2849034368991852 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.17075885832309723, + "learning_rate": 2.584945405735771e-05, + "loss": 0.2494, + "step": 11951, + "teacher_loss": 0.2581344544887543 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5903782844543457, + "learning_rate": 2.5847885550686364e-05, + "loss": 0.3831, + "step": 11952, + "teacher_loss": 0.360016793012619 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.8014289140701294, + "learning_rate": 2.5846316795305533e-05, + "loss": 0.3588, + "step": 11953, + "teacher_loss": 0.30960702896118164 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.2461264729499817, + "learning_rate": 2.5844747791251174e-05, + "loss": 0.3334, + "step": 11954, + "teacher_loss": 0.34310024976730347 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.34152668714523315, + "learning_rate": 2.5843178538559262e-05, + "loss": 0.2386, + "step": 11955, + "teacher_loss": 0.22713598608970642 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.6526594161987305, + "learning_rate": 2.5841609037265778e-05, + "loss": 0.3606, + "step": 11956, + "teacher_loss": 0.3281291723251343 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.1829129457473755, + "learning_rate": 2.5840039287406702e-05, + "loss": 0.3092, + "step": 11957, + "teacher_loss": 0.3231821060180664 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.7171481847763062, + "learning_rate": 2.5838469289018025e-05, + "loss": 0.471, + "step": 11958, + "teacher_loss": 0.4436776041984558 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5656023621559143, + "learning_rate": 2.5836899042135746e-05, + "loss": 0.2522, + "step": 11959, + "teacher_loss": 0.21734771132469177 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.808519721031189, + "learning_rate": 2.5835328546795854e-05, + "loss": 0.3398, + "step": 11960, + "teacher_loss": 0.2877105474472046 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.31676119565963745, + "learning_rate": 2.583375780303437e-05, + "loss": 0.1583, + "step": 11961, + "teacher_loss": 0.1407456398010254 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.24294674396514893, + "learning_rate": 2.5832186810887302e-05, + "loss": 0.1866, + "step": 11962, + "teacher_loss": 0.18029829859733582 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.4894581437110901, + "learning_rate": 2.5830615570390666e-05, + "loss": 0.3261, + "step": 11963, + "teacher_loss": 0.30794578790664673 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5461469888687134, + "learning_rate": 2.5829044081580487e-05, + "loss": 0.2955, + "step": 11964, + "teacher_loss": 0.2676633298397064 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5455873608589172, + "learning_rate": 2.5827472344492792e-05, + "loss": 0.2572, + "step": 11965, + "teacher_loss": 0.22513779997825623 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.39715510606765747, + "learning_rate": 2.5825900359163623e-05, + "loss": 0.2804, + "step": 11966, + "teacher_loss": 0.2674432396888733 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.3988390862941742, + "learning_rate": 2.5824328125629016e-05, + "loss": 0.2931, + "step": 11967, + "teacher_loss": 0.28136301040649414 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.42058253288269043, + "learning_rate": 2.5822755643925014e-05, + "loss": 0.3378, + "step": 11968, + "teacher_loss": 0.32863694429397583 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.40050947666168213, + "learning_rate": 2.582118291408767e-05, + "loss": 0.2073, + "step": 11969, + "teacher_loss": 0.1858624517917633 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.3080638647079468, + "learning_rate": 2.5819609936153052e-05, + "loss": 0.1945, + "step": 11970, + "teacher_loss": 0.18190079927444458 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.30709108710289, + "learning_rate": 2.5818036710157218e-05, + "loss": 0.2327, + "step": 11971, + "teacher_loss": 0.22445183992385864 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.5737199783325195, + "learning_rate": 2.5816463236136236e-05, + "loss": 0.3478, + "step": 11972, + "teacher_loss": 0.3226962387561798 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.18862685561180115, + "learning_rate": 2.581488951412618e-05, + "loss": 0.2282, + "step": 11973, + "teacher_loss": 0.2325660139322281 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.415749728679657, + "learning_rate": 2.581331554416313e-05, + "loss": 0.3714, + "step": 11974, + "teacher_loss": 0.3664560914039612 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.4448944926261902, + "learning_rate": 2.5811741326283177e-05, + "loss": 0.2499, + "step": 11975, + "teacher_loss": 0.2282048463821411 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.34253832697868347, + "learning_rate": 2.5810166860522407e-05, + "loss": 0.246, + "step": 11976, + "teacher_loss": 0.23528921604156494 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.6029420495033264, + "learning_rate": 2.580859214691693e-05, + "loss": 0.2501, + "step": 11977, + "teacher_loss": 0.21086959540843964 + }, + { + "compression_loss": 0.0, + "epoch": 2.16, + "label_loss": 0.3937985301017761, + "learning_rate": 2.5807017185502833e-05, + "loss": 0.2822, + "step": 11978, + "teacher_loss": 0.2698172330856323 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.8012281656265259, + "learning_rate": 2.5805441976316234e-05, + "loss": 0.3212, + "step": 11979, + "teacher_loss": 0.2678791880607605 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.30180203914642334, + "learning_rate": 2.580386651939325e-05, + "loss": 0.188, + "step": 11980, + "teacher_loss": 0.17537912726402283 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3701208233833313, + "learning_rate": 2.5802290814769996e-05, + "loss": 0.239, + "step": 11981, + "teacher_loss": 0.22443421185016632 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.2737005949020386, + "learning_rate": 2.58007148624826e-05, + "loss": 0.3264, + "step": 11982, + "teacher_loss": 0.3322793245315552 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.9327365756034851, + "learning_rate": 2.57991386625672e-05, + "loss": 0.3687, + "step": 11983, + "teacher_loss": 0.30606523156166077 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.27122196555137634, + "learning_rate": 2.579756221505992e-05, + "loss": 0.2085, + "step": 11984, + "teacher_loss": 0.20148402452468872 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.27522778511047363, + "learning_rate": 2.5795985519996915e-05, + "loss": 0.2656, + "step": 11985, + "teacher_loss": 0.26449668407440186 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3158515393733978, + "learning_rate": 2.5794408577414334e-05, + "loss": 0.2554, + "step": 11986, + "teacher_loss": 0.2487136721611023 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3624435067176819, + "learning_rate": 2.5792831387348322e-05, + "loss": 0.3121, + "step": 11987, + "teacher_loss": 0.30649369955062866 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.16503341495990753, + "learning_rate": 2.5791253949835045e-05, + "loss": 0.1692, + "step": 11988, + "teacher_loss": 0.16961508989334106 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.58681321144104, + "learning_rate": 2.5789676264910668e-05, + "loss": 0.2586, + "step": 11989, + "teacher_loss": 0.22211971879005432 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.7148056030273438, + "learning_rate": 2.578809833261137e-05, + "loss": 0.3244, + "step": 11990, + "teacher_loss": 0.28103333711624146 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3362712860107422, + "learning_rate": 2.5786520152973316e-05, + "loss": 0.3379, + "step": 11991, + "teacher_loss": 0.338096559047699 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.41751572489738464, + "learning_rate": 2.5784941726032695e-05, + "loss": 0.2056, + "step": 11992, + "teacher_loss": 0.1820676475763321 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.7120903730392456, + "learning_rate": 2.5783363051825694e-05, + "loss": 0.6266, + "step": 11993, + "teacher_loss": 0.617125391960144 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.2720344662666321, + "learning_rate": 2.5781784130388512e-05, + "loss": 0.2203, + "step": 11994, + "teacher_loss": 0.214506596326828 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.2760605812072754, + "learning_rate": 2.5780204961757345e-05, + "loss": 0.2038, + "step": 11995, + "teacher_loss": 0.1957758665084839 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.4183105528354645, + "learning_rate": 2.5778625545968397e-05, + "loss": 0.2128, + "step": 11996, + "teacher_loss": 0.18993742763996124 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.48098820447921753, + "learning_rate": 2.5777045883057882e-05, + "loss": 0.268, + "step": 11997, + "teacher_loss": 0.24437211453914642 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.6892327070236206, + "learning_rate": 2.5775465973062016e-05, + "loss": 0.2954, + "step": 11998, + "teacher_loss": 0.2516239881515503 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.509902834892273, + "learning_rate": 2.577388581601702e-05, + "loss": 0.2216, + "step": 11999, + "teacher_loss": 0.18952716886997223 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.5059928894042969, + "learning_rate": 2.577230541195913e-05, + "loss": 0.2591, + "step": 12000, + "teacher_loss": 0.2316424399614334 + }, + { + "epoch": 2.17, + "eval_exact_match": 79.47019867549669, + "eval_f1": 87.12672142557732, + "step": 12000 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.34276318550109863, + "learning_rate": 2.577072476092457e-05, + "loss": 0.342, + "step": 12001, + "teacher_loss": 0.34192001819610596 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.6022912263870239, + "learning_rate": 2.576914386294958e-05, + "loss": 0.3775, + "step": 12002, + "teacher_loss": 0.3525382876396179 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.10767114162445068, + "learning_rate": 2.5767562718070416e-05, + "loss": 0.1679, + "step": 12003, + "teacher_loss": 0.17463470995426178 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3117823600769043, + "learning_rate": 2.5765981326323317e-05, + "loss": 0.3082, + "step": 12004, + "teacher_loss": 0.30777716636657715 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.22859103977680206, + "learning_rate": 2.5764399687744543e-05, + "loss": 0.2343, + "step": 12005, + "teacher_loss": 0.23494787514209747 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.45248425006866455, + "learning_rate": 2.5762817802370357e-05, + "loss": 0.2488, + "step": 12006, + "teacher_loss": 0.2261640429496765 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.4681066870689392, + "learning_rate": 2.5761235670237034e-05, + "loss": 0.2143, + "step": 12007, + "teacher_loss": 0.18613725900650024 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.624732494354248, + "learning_rate": 2.5759653291380833e-05, + "loss": 0.3549, + "step": 12008, + "teacher_loss": 0.32493099570274353 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3321205675601959, + "learning_rate": 2.575807066583805e-05, + "loss": 0.2587, + "step": 12009, + "teacher_loss": 0.2505359947681427 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.36879605054855347, + "learning_rate": 2.5756487793644953e-05, + "loss": 0.2633, + "step": 12010, + "teacher_loss": 0.2516269087791443 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.43571197986602783, + "learning_rate": 2.5754904674837845e-05, + "loss": 0.2555, + "step": 12011, + "teacher_loss": 0.2354586273431778 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.18305779993534088, + "learning_rate": 2.5753321309453015e-05, + "loss": 0.2356, + "step": 12012, + "teacher_loss": 0.2414005994796753 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.4291021525859833, + "learning_rate": 2.575173769752677e-05, + "loss": 0.2353, + "step": 12013, + "teacher_loss": 0.21376332640647888 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.23948828876018524, + "learning_rate": 2.5750153839095413e-05, + "loss": 0.1863, + "step": 12014, + "teacher_loss": 0.1803765594959259 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.2899554371833801, + "learning_rate": 2.574856973419526e-05, + "loss": 0.272, + "step": 12015, + "teacher_loss": 0.26999813318252563 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.4110201597213745, + "learning_rate": 2.5746985382862628e-05, + "loss": 0.3345, + "step": 12016, + "teacher_loss": 0.3260200619697571 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.341378390789032, + "learning_rate": 2.5745400785133842e-05, + "loss": 0.2767, + "step": 12017, + "teacher_loss": 0.26946961879730225 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.7312158346176147, + "learning_rate": 2.5743815941045236e-05, + "loss": 0.7181, + "step": 12018, + "teacher_loss": 0.7166070938110352 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.26973846554756165, + "learning_rate": 2.574223085063314e-05, + "loss": 0.2004, + "step": 12019, + "teacher_loss": 0.19272738695144653 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.4913783073425293, + "learning_rate": 2.5740645513933895e-05, + "loss": 0.2782, + "step": 12020, + "teacher_loss": 0.25453346967697144 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.5462285876274109, + "learning_rate": 2.5739059930983853e-05, + "loss": 0.2697, + "step": 12021, + "teacher_loss": 0.23893225193023682 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.41651853919029236, + "learning_rate": 2.573747410181937e-05, + "loss": 0.3523, + "step": 12022, + "teacher_loss": 0.3452187180519104 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.9229111671447754, + "learning_rate": 2.5735888026476785e-05, + "loss": 0.3941, + "step": 12023, + "teacher_loss": 0.33535003662109375 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.5272068977355957, + "learning_rate": 2.5734301704992486e-05, + "loss": 0.3146, + "step": 12024, + "teacher_loss": 0.2909737229347229 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.4346199631690979, + "learning_rate": 2.5732715137402828e-05, + "loss": 0.274, + "step": 12025, + "teacher_loss": 0.256185919046402 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3124018609523773, + "learning_rate": 2.573112832374419e-05, + "loss": 0.239, + "step": 12026, + "teacher_loss": 0.23080243170261383 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.6257033348083496, + "learning_rate": 2.5729541264052957e-05, + "loss": 0.3644, + "step": 12027, + "teacher_loss": 0.3354036808013916 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.9220696687698364, + "learning_rate": 2.5727953958365507e-05, + "loss": 0.3085, + "step": 12028, + "teacher_loss": 0.24028854072093964 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.5452176332473755, + "learning_rate": 2.5726366406718237e-05, + "loss": 0.3183, + "step": 12029, + "teacher_loss": 0.2931378483772278 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.5197912454605103, + "learning_rate": 2.5724778609147547e-05, + "loss": 0.3316, + "step": 12030, + "teacher_loss": 0.3106488883495331 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.5562528371810913, + "learning_rate": 2.5723190565689833e-05, + "loss": 0.3332, + "step": 12031, + "teacher_loss": 0.30836087465286255 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.3790109157562256, + "learning_rate": 2.5721602276381518e-05, + "loss": 0.3352, + "step": 12032, + "teacher_loss": 0.3302847146987915 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.36089059710502625, + "learning_rate": 2.5720013741259e-05, + "loss": 0.3289, + "step": 12033, + "teacher_loss": 0.3252910077571869 + }, + { + "compression_loss": 0.0, + "epoch": 2.17, + "label_loss": 0.7168359756469727, + "learning_rate": 2.571842496035871e-05, + "loss": 0.362, + "step": 12034, + "teacher_loss": 0.3225597143173218 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3117433190345764, + "learning_rate": 2.5716835933717072e-05, + "loss": 0.1797, + "step": 12035, + "teacher_loss": 0.16502366960048676 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.6041591763496399, + "learning_rate": 2.5715246661370515e-05, + "loss": 0.4696, + "step": 12036, + "teacher_loss": 0.4546201825141907 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3809165954589844, + "learning_rate": 2.571365714335548e-05, + "loss": 0.3571, + "step": 12037, + "teacher_loss": 0.3544117212295532 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.2490965873003006, + "learning_rate": 2.5712067379708404e-05, + "loss": 0.1948, + "step": 12038, + "teacher_loss": 0.18877166509628296 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.7868797183036804, + "learning_rate": 2.5710477370465747e-05, + "loss": 0.2954, + "step": 12039, + "teacher_loss": 0.24081027507781982 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.9049237370491028, + "learning_rate": 2.570888711566395e-05, + "loss": 0.2798, + "step": 12040, + "teacher_loss": 0.21031169593334198 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4199647307395935, + "learning_rate": 2.570729661533948e-05, + "loss": 0.2221, + "step": 12041, + "teacher_loss": 0.20015479624271393 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3062474727630615, + "learning_rate": 2.57057058695288e-05, + "loss": 0.1614, + "step": 12042, + "teacher_loss": 0.14530816674232483 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4632222652435303, + "learning_rate": 2.570411487826838e-05, + "loss": 0.3317, + "step": 12043, + "teacher_loss": 0.3170440196990967 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.6057695746421814, + "learning_rate": 2.57025236415947e-05, + "loss": 0.3649, + "step": 12044, + "teacher_loss": 0.3381814956665039 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.7713839411735535, + "learning_rate": 2.570093215954424e-05, + "loss": 0.4217, + "step": 12045, + "teacher_loss": 0.38281798362731934 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.48428618907928467, + "learning_rate": 2.5699340432153495e-05, + "loss": 0.2533, + "step": 12046, + "teacher_loss": 0.2276536226272583 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4086078703403473, + "learning_rate": 2.5697748459458945e-05, + "loss": 0.1828, + "step": 12047, + "teacher_loss": 0.15768729150295258 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.20795422792434692, + "learning_rate": 2.5696156241497095e-05, + "loss": 0.2027, + "step": 12048, + "teacher_loss": 0.20213884115219116 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.8001163005828857, + "learning_rate": 2.5694563778304455e-05, + "loss": 0.3824, + "step": 12049, + "teacher_loss": 0.33596912026405334 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.8652982115745544, + "learning_rate": 2.5692971069917532e-05, + "loss": 0.5746, + "step": 12050, + "teacher_loss": 0.5423203110694885 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.48111557960510254, + "learning_rate": 2.5691378116372843e-05, + "loss": 0.2543, + "step": 12051, + "teacher_loss": 0.2291422337293625 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.496149480342865, + "learning_rate": 2.5689784917706905e-05, + "loss": 0.2703, + "step": 12052, + "teacher_loss": 0.24520540237426758 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.7141072750091553, + "learning_rate": 2.5688191473956247e-05, + "loss": 0.2283, + "step": 12053, + "teacher_loss": 0.17433977127075195 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.544466495513916, + "learning_rate": 2.5686597785157405e-05, + "loss": 0.3349, + "step": 12054, + "teacher_loss": 0.311573326587677 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.5184895396232605, + "learning_rate": 2.5685003851346917e-05, + "loss": 0.3071, + "step": 12055, + "teacher_loss": 0.2836421728134155 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.20427894592285156, + "learning_rate": 2.5683409672561328e-05, + "loss": 0.2829, + "step": 12056, + "teacher_loss": 0.29160743951797485 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 1.2334232330322266, + "learning_rate": 2.568181524883718e-05, + "loss": 0.347, + "step": 12057, + "teacher_loss": 0.2484818696975708 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.6628396511077881, + "learning_rate": 2.568022058021104e-05, + "loss": 0.4199, + "step": 12058, + "teacher_loss": 0.3928550183773041 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.5072532296180725, + "learning_rate": 2.5678625666719457e-05, + "loss": 0.4238, + "step": 12059, + "teacher_loss": 0.4145410656929016 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.21382319927215576, + "learning_rate": 2.567703050839901e-05, + "loss": 0.2293, + "step": 12060, + "teacher_loss": 0.23103821277618408 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.21607086062431335, + "learning_rate": 2.567543510528626e-05, + "loss": 0.1608, + "step": 12061, + "teacher_loss": 0.15470048785209656 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.5792874097824097, + "learning_rate": 2.5673839457417793e-05, + "loss": 0.3208, + "step": 12062, + "teacher_loss": 0.2920577824115753 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4855174124240875, + "learning_rate": 2.5672243564830188e-05, + "loss": 0.2983, + "step": 12063, + "teacher_loss": 0.27746179699897766 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.43412184715270996, + "learning_rate": 2.5670647427560036e-05, + "loss": 0.2588, + "step": 12064, + "teacher_loss": 0.23928947746753693 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.33610790967941284, + "learning_rate": 2.566905104564393e-05, + "loss": 0.2536, + "step": 12065, + "teacher_loss": 0.2444775402545929 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.17196375131607056, + "learning_rate": 2.5667454419118467e-05, + "loss": 0.2121, + "step": 12066, + "teacher_loss": 0.21656128764152527 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.45180654525756836, + "learning_rate": 2.566585754802026e-05, + "loss": 0.2003, + "step": 12067, + "teacher_loss": 0.1723524034023285 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.5299785137176514, + "learning_rate": 2.5664260432385916e-05, + "loss": 0.3177, + "step": 12068, + "teacher_loss": 0.29414108395576477 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3738633990287781, + "learning_rate": 2.5662663072252056e-05, + "loss": 0.1975, + "step": 12069, + "teacher_loss": 0.1778697371482849 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3002810478210449, + "learning_rate": 2.5661065467655298e-05, + "loss": 0.1912, + "step": 12070, + "teacher_loss": 0.1791335493326187 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.14524048566818237, + "learning_rate": 2.5659467618632275e-05, + "loss": 0.1556, + "step": 12071, + "teacher_loss": 0.1567300260066986 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.31826430559158325, + "learning_rate": 2.5657869525219617e-05, + "loss": 0.2144, + "step": 12072, + "teacher_loss": 0.20284314453601837 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3811996579170227, + "learning_rate": 2.5656271187453962e-05, + "loss": 0.2823, + "step": 12073, + "teacher_loss": 0.2713657021522522 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.8714183568954468, + "learning_rate": 2.565467260537196e-05, + "loss": 0.4472, + "step": 12074, + "teacher_loss": 0.4000872075557709 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.29336345195770264, + "learning_rate": 2.5653073779010262e-05, + "loss": 0.208, + "step": 12075, + "teacher_loss": 0.19855134189128876 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.33302801847457886, + "learning_rate": 2.5651474708405518e-05, + "loss": 0.1924, + "step": 12076, + "teacher_loss": 0.1767941415309906 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.13110433518886566, + "learning_rate": 2.5649875393594396e-05, + "loss": 0.2092, + "step": 12077, + "teacher_loss": 0.21785868704319 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.21930095553398132, + "learning_rate": 2.5648275834613562e-05, + "loss": 0.2903, + "step": 12078, + "teacher_loss": 0.2981743812561035 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4509267210960388, + "learning_rate": 2.564667603149969e-05, + "loss": 0.2, + "step": 12079, + "teacher_loss": 0.1720849871635437 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.38413649797439575, + "learning_rate": 2.5645075984289447e-05, + "loss": 0.2386, + "step": 12080, + "teacher_loss": 0.2224786877632141 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4200987219810486, + "learning_rate": 2.5643475693019536e-05, + "loss": 0.2892, + "step": 12081, + "teacher_loss": 0.2747001647949219 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4053970277309418, + "learning_rate": 2.5641875157726636e-05, + "loss": 0.285, + "step": 12082, + "teacher_loss": 0.2716303765773773 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.386601984500885, + "learning_rate": 2.5640274378447444e-05, + "loss": 0.2419, + "step": 12083, + "teacher_loss": 0.22576940059661865 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.3250811696052551, + "learning_rate": 2.5638673355218664e-05, + "loss": 0.2471, + "step": 12084, + "teacher_loss": 0.23847800493240356 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.11539055407047272, + "learning_rate": 2.5637072088076995e-05, + "loss": 0.1532, + "step": 12085, + "teacher_loss": 0.15744438767433167 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.4190255105495453, + "learning_rate": 2.5635470577059163e-05, + "loss": 0.2857, + "step": 12086, + "teacher_loss": 0.27085593342781067 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.9595630764961243, + "learning_rate": 2.5633868822201873e-05, + "loss": 0.3785, + "step": 12087, + "teacher_loss": 0.3139900267124176 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.28952646255493164, + "learning_rate": 2.563226682354185e-05, + "loss": 0.2066, + "step": 12088, + "teacher_loss": 0.19733980298042297 + }, + { + "compression_loss": 0.0, + "epoch": 2.18, + "label_loss": 0.6147408485412598, + "learning_rate": 2.5630664581115827e-05, + "loss": 0.3599, + "step": 12089, + "teacher_loss": 0.33156412839889526 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.794202983379364, + "learning_rate": 2.5629062094960543e-05, + "loss": 0.4562, + "step": 12090, + "teacher_loss": 0.4186851382255554 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 1.1200573444366455, + "learning_rate": 2.562745936511273e-05, + "loss": 0.4182, + "step": 12091, + "teacher_loss": 0.3402583599090576 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5985281467437744, + "learning_rate": 2.562585639160913e-05, + "loss": 0.339, + "step": 12092, + "teacher_loss": 0.3101271390914917 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.2703246474266052, + "learning_rate": 2.5624253174486513e-05, + "loss": 0.1877, + "step": 12093, + "teacher_loss": 0.1785367876291275 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.16954196989536285, + "learning_rate": 2.5622649713781615e-05, + "loss": 0.1536, + "step": 12094, + "teacher_loss": 0.15188385546207428 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.3344150185585022, + "learning_rate": 2.5621046009531208e-05, + "loss": 0.1925, + "step": 12095, + "teacher_loss": 0.17671418190002441 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.3000280261039734, + "learning_rate": 2.5619442061772065e-05, + "loss": 0.251, + "step": 12096, + "teacher_loss": 0.24557159841060638 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5467036962509155, + "learning_rate": 2.5617837870540946e-05, + "loss": 0.2217, + "step": 12097, + "teacher_loss": 0.185600146651268 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.4630429744720459, + "learning_rate": 2.5616233435874648e-05, + "loss": 0.2249, + "step": 12098, + "teacher_loss": 0.19844374060630798 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.35714492201805115, + "learning_rate": 2.561462875780994e-05, + "loss": 0.2282, + "step": 12099, + "teacher_loss": 0.21391957998275757 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.8228484392166138, + "learning_rate": 2.561302383638362e-05, + "loss": 0.4099, + "step": 12100, + "teacher_loss": 0.36405158042907715 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.3310806155204773, + "learning_rate": 2.561141867163248e-05, + "loss": 0.1932, + "step": 12101, + "teacher_loss": 0.17791973054409027 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.8997209072113037, + "learning_rate": 2.5609813263593332e-05, + "loss": 0.2585, + "step": 12102, + "teacher_loss": 0.1873003989458084 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.48904651403427124, + "learning_rate": 2.5608207612302966e-05, + "loss": 0.2845, + "step": 12103, + "teacher_loss": 0.2617371082305908 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.40799176692962646, + "learning_rate": 2.5606601717798212e-05, + "loss": 0.2837, + "step": 12104, + "teacher_loss": 0.2698557674884796 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.45061129331588745, + "learning_rate": 2.560499558011588e-05, + "loss": 0.2876, + "step": 12105, + "teacher_loss": 0.2694329023361206 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.7518491744995117, + "learning_rate": 2.560338919929279e-05, + "loss": 0.3946, + "step": 12106, + "teacher_loss": 0.3548893332481384 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5422612428665161, + "learning_rate": 2.560178257536578e-05, + "loss": 0.3216, + "step": 12107, + "teacher_loss": 0.29713425040245056 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5565462708473206, + "learning_rate": 2.5600175708371686e-05, + "loss": 0.2897, + "step": 12108, + "teacher_loss": 0.2600501775741577 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 1.0550949573516846, + "learning_rate": 2.559856859834734e-05, + "loss": 0.3515, + "step": 12109, + "teacher_loss": 0.27333563566207886 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.9548097848892212, + "learning_rate": 2.559696124532959e-05, + "loss": 0.4254, + "step": 12110, + "teacher_loss": 0.3665444552898407 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5140724778175354, + "learning_rate": 2.5595353649355292e-05, + "loss": 0.2482, + "step": 12111, + "teacher_loss": 0.21863387525081635 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.772221565246582, + "learning_rate": 2.5593745810461302e-05, + "loss": 0.5363, + "step": 12112, + "teacher_loss": 0.5100698471069336 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.3502851724624634, + "learning_rate": 2.5592137728684477e-05, + "loss": 0.3064, + "step": 12113, + "teacher_loss": 0.3014754354953766 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.28992676734924316, + "learning_rate": 2.55905294040617e-05, + "loss": 0.2912, + "step": 12114, + "teacher_loss": 0.29134488105773926 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.15178117156028748, + "learning_rate": 2.5588920836629827e-05, + "loss": 0.182, + "step": 12115, + "teacher_loss": 0.1853661835193634 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.15197540819644928, + "learning_rate": 2.5587312026425752e-05, + "loss": 0.154, + "step": 12116, + "teacher_loss": 0.15419799089431763 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.34311234951019287, + "learning_rate": 2.5585702973486354e-05, + "loss": 0.3549, + "step": 12117, + "teacher_loss": 0.35615992546081543 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.3055446743965149, + "learning_rate": 2.558409367784852e-05, + "loss": 0.1864, + "step": 12118, + "teacher_loss": 0.17313650250434875 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.40892112255096436, + "learning_rate": 2.5582484139549156e-05, + "loss": 0.2881, + "step": 12119, + "teacher_loss": 0.27466607093811035 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.4413086175918579, + "learning_rate": 2.5580874358625157e-05, + "loss": 0.2409, + "step": 12120, + "teacher_loss": 0.21865665912628174 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.17831552028656006, + "learning_rate": 2.5579264335113433e-05, + "loss": 0.1819, + "step": 12121, + "teacher_loss": 0.18225814402103424 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.16371086239814758, + "learning_rate": 2.5577654069050897e-05, + "loss": 0.1663, + "step": 12122, + "teacher_loss": 0.1666153073310852 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.33577772974967957, + "learning_rate": 2.5576043560474462e-05, + "loss": 0.2234, + "step": 12123, + "teacher_loss": 0.21088311076164246 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5169893503189087, + "learning_rate": 2.5574432809421057e-05, + "loss": 0.3836, + "step": 12124, + "teacher_loss": 0.3687896132469177 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.5519869327545166, + "learning_rate": 2.5572821815927615e-05, + "loss": 0.3357, + "step": 12125, + "teacher_loss": 0.31165921688079834 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.33798593282699585, + "learning_rate": 2.5571210580031063e-05, + "loss": 0.2455, + "step": 12126, + "teacher_loss": 0.2352066934108734 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.7340861558914185, + "learning_rate": 2.556959910176835e-05, + "loss": 0.2699, + "step": 12127, + "teacher_loss": 0.21831554174423218 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.39911675453186035, + "learning_rate": 2.556798738117642e-05, + "loss": 0.27, + "step": 12128, + "teacher_loss": 0.2556079924106598 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.46826624870300293, + "learning_rate": 2.5566375418292223e-05, + "loss": 0.2859, + "step": 12129, + "teacher_loss": 0.26564115285873413 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.4208621382713318, + "learning_rate": 2.5564763213152716e-05, + "loss": 0.295, + "step": 12130, + "teacher_loss": 0.2809777557849884 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.7827037572860718, + "learning_rate": 2.5563150765794864e-05, + "loss": 0.3718, + "step": 12131, + "teacher_loss": 0.32617440819740295 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.2826290726661682, + "learning_rate": 2.5561538076255635e-05, + "loss": 0.2284, + "step": 12132, + "teacher_loss": 0.22239045798778534 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.379263699054718, + "learning_rate": 2.5559925144572008e-05, + "loss": 0.3705, + "step": 12133, + "teacher_loss": 0.36947742104530334 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.4460013508796692, + "learning_rate": 2.555831197078095e-05, + "loss": 0.2506, + "step": 12134, + "teacher_loss": 0.22890590131282806 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.2931884229183197, + "learning_rate": 2.5556698554919453e-05, + "loss": 0.2403, + "step": 12135, + "teacher_loss": 0.2344590574502945 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.3257597088813782, + "learning_rate": 2.5555084897024515e-05, + "loss": 0.2206, + "step": 12136, + "teacher_loss": 0.20887988805770874 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.32765859365463257, + "learning_rate": 2.5553470997133125e-05, + "loss": 0.2559, + "step": 12137, + "teacher_loss": 0.2478812038898468 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.6798316240310669, + "learning_rate": 2.5551856855282284e-05, + "loss": 0.2863, + "step": 12138, + "teacher_loss": 0.2425793558359146 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.4339195787906647, + "learning_rate": 2.5550242471509e-05, + "loss": 0.3771, + "step": 12139, + "teacher_loss": 0.37080326676368713 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.20960810780525208, + "learning_rate": 2.554862784585029e-05, + "loss": 0.222, + "step": 12140, + "teacher_loss": 0.22340822219848633 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.19935759902000427, + "learning_rate": 2.554701297834317e-05, + "loss": 0.2393, + "step": 12141, + "teacher_loss": 0.24370066821575165 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.6644387245178223, + "learning_rate": 2.5545397869024666e-05, + "loss": 0.3751, + "step": 12142, + "teacher_loss": 0.3429903984069824 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.44039052724838257, + "learning_rate": 2.5543782517931802e-05, + "loss": 0.4038, + "step": 12143, + "teacher_loss": 0.3997552692890167 + }, + { + "compression_loss": 0.0, + "epoch": 2.19, + "label_loss": 0.39276933670043945, + "learning_rate": 2.554216692510162e-05, + "loss": 0.2379, + "step": 12144, + "teacher_loss": 0.22066599130630493 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.34065189957618713, + "learning_rate": 2.5540551090571153e-05, + "loss": 0.2422, + "step": 12145, + "teacher_loss": 0.2312653511762619 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5802285671234131, + "learning_rate": 2.5538935014377452e-05, + "loss": 0.2676, + "step": 12146, + "teacher_loss": 0.2328624427318573 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5580289363861084, + "learning_rate": 2.553731869655757e-05, + "loss": 0.3375, + "step": 12147, + "teacher_loss": 0.31304556131362915 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6319280862808228, + "learning_rate": 2.5535702137148562e-05, + "loss": 0.419, + "step": 12148, + "teacher_loss": 0.3952864110469818 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.2756175100803375, + "learning_rate": 2.5534085336187494e-05, + "loss": 0.1954, + "step": 12149, + "teacher_loss": 0.1864548623561859 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6401157379150391, + "learning_rate": 2.5532468293711435e-05, + "loss": 0.2689, + "step": 12150, + "teacher_loss": 0.22761327028274536 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.4783114492893219, + "learning_rate": 2.553085100975745e-05, + "loss": 0.3794, + "step": 12151, + "teacher_loss": 0.36843979358673096 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5988929867744446, + "learning_rate": 2.5529233484362628e-05, + "loss": 0.2333, + "step": 12152, + "teacher_loss": 0.1926494836807251 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.9094337224960327, + "learning_rate": 2.5527615717564046e-05, + "loss": 0.3233, + "step": 12153, + "teacher_loss": 0.25820791721343994 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.17630723118782043, + "learning_rate": 2.5525997709398802e-05, + "loss": 0.1679, + "step": 12154, + "teacher_loss": 0.1669875979423523 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5125003457069397, + "learning_rate": 2.552437945990399e-05, + "loss": 0.2524, + "step": 12155, + "teacher_loss": 0.2234780490398407 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.35562562942504883, + "learning_rate": 2.5522760969116707e-05, + "loss": 0.3187, + "step": 12156, + "teacher_loss": 0.31456565856933594 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.4955211877822876, + "learning_rate": 2.5521142237074066e-05, + "loss": 0.3079, + "step": 12157, + "teacher_loss": 0.2870635986328125 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6698476076126099, + "learning_rate": 2.551952326381318e-05, + "loss": 0.2409, + "step": 12158, + "teacher_loss": 0.19325101375579834 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.19754254817962646, + "learning_rate": 2.5517904049371165e-05, + "loss": 0.1932, + "step": 12159, + "teacher_loss": 0.19276052713394165 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.38089612126350403, + "learning_rate": 2.551628459378514e-05, + "loss": 0.2733, + "step": 12160, + "teacher_loss": 0.26136380434036255 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6516788601875305, + "learning_rate": 2.5514664897092242e-05, + "loss": 0.3308, + "step": 12161, + "teacher_loss": 0.2951948940753937 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.3119394779205322, + "learning_rate": 2.5513044959329602e-05, + "loss": 0.1577, + "step": 12162, + "teacher_loss": 0.1405426561832428 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.2910478115081787, + "learning_rate": 2.5511424780534363e-05, + "loss": 0.2653, + "step": 12163, + "teacher_loss": 0.26248979568481445 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.8668642044067383, + "learning_rate": 2.5509804360743663e-05, + "loss": 0.4213, + "step": 12164, + "teacher_loss": 0.3718433380126953 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.9940638542175293, + "learning_rate": 2.5508183699994663e-05, + "loss": 0.4573, + "step": 12165, + "teacher_loss": 0.39761459827423096 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.41105973720550537, + "learning_rate": 2.5506562798324514e-05, + "loss": 0.2394, + "step": 12166, + "teacher_loss": 0.2203579545021057 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 1.1164674758911133, + "learning_rate": 2.5504941655770383e-05, + "loss": 0.2955, + "step": 12167, + "teacher_loss": 0.20424652099609375 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.2605484127998352, + "learning_rate": 2.5503320272369433e-05, + "loss": 0.2504, + "step": 12168, + "teacher_loss": 0.24932171404361725 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.2466181516647339, + "learning_rate": 2.5501698648158842e-05, + "loss": 0.2516, + "step": 12169, + "teacher_loss": 0.2521243095397949 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.20357359945774078, + "learning_rate": 2.550007678317578e-05, + "loss": 0.1979, + "step": 12170, + "teacher_loss": 0.19724902510643005 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.594035804271698, + "learning_rate": 2.5498454677457447e-05, + "loss": 0.3659, + "step": 12171, + "teacher_loss": 0.34059953689575195 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.39862537384033203, + "learning_rate": 2.549683233104102e-05, + "loss": 0.1825, + "step": 12172, + "teacher_loss": 0.15850132703781128 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.3801894187927246, + "learning_rate": 2.54952097439637e-05, + "loss": 0.3092, + "step": 12173, + "teacher_loss": 0.301271915435791 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.20888887345790863, + "learning_rate": 2.549358691626269e-05, + "loss": 0.1734, + "step": 12174, + "teacher_loss": 0.1695026457309723 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6259768605232239, + "learning_rate": 2.5491963847975185e-05, + "loss": 0.3332, + "step": 12175, + "teacher_loss": 0.30065059661865234 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.40156078338623047, + "learning_rate": 2.549034053913841e-05, + "loss": 0.3423, + "step": 12176, + "teacher_loss": 0.3357672393321991 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.29409000277519226, + "learning_rate": 2.548871698978958e-05, + "loss": 0.2437, + "step": 12177, + "teacher_loss": 0.2380896657705307 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.502200186252594, + "learning_rate": 2.548709319996591e-05, + "loss": 0.2285, + "step": 12178, + "teacher_loss": 0.19810648262500763 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.2712060511112213, + "learning_rate": 2.548546916970464e-05, + "loss": 0.1867, + "step": 12179, + "teacher_loss": 0.1773313283920288 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5376166105270386, + "learning_rate": 2.5483844899043e-05, + "loss": 0.4442, + "step": 12180, + "teacher_loss": 0.43385449051856995 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.37323909997940063, + "learning_rate": 2.5482220388018227e-05, + "loss": 0.2298, + "step": 12181, + "teacher_loss": 0.21388836205005646 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.26472529768943787, + "learning_rate": 2.548059563666757e-05, + "loss": 0.1785, + "step": 12182, + "teacher_loss": 0.16889676451683044 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.7561064958572388, + "learning_rate": 2.5478970645028274e-05, + "loss": 0.4525, + "step": 12183, + "teacher_loss": 0.4187861382961273 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.369077205657959, + "learning_rate": 2.5477345413137597e-05, + "loss": 0.3052, + "step": 12184, + "teacher_loss": 0.29804956912994385 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.4446794390678406, + "learning_rate": 2.5475719941032807e-05, + "loss": 0.2919, + "step": 12185, + "teacher_loss": 0.27488404512405396 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.38614439964294434, + "learning_rate": 2.5474094228751165e-05, + "loss": 0.2017, + "step": 12186, + "teacher_loss": 0.1811678111553192 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.35596877336502075, + "learning_rate": 2.5472468276329947e-05, + "loss": 0.2945, + "step": 12187, + "teacher_loss": 0.2877189517021179 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.636809766292572, + "learning_rate": 2.5470842083806424e-05, + "loss": 0.2759, + "step": 12188, + "teacher_loss": 0.2358129769563675 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.3739003837108612, + "learning_rate": 2.546921565121789e-05, + "loss": 0.2272, + "step": 12189, + "teacher_loss": 0.2108791172504425 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.3810468316078186, + "learning_rate": 2.546758897860163e-05, + "loss": 0.2265, + "step": 12190, + "teacher_loss": 0.20934978127479553 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5666791796684265, + "learning_rate": 2.5465962065994938e-05, + "loss": 0.2648, + "step": 12191, + "teacher_loss": 0.23131319880485535 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 1.2998859882354736, + "learning_rate": 2.5464334913435112e-05, + "loss": 0.3545, + "step": 12192, + "teacher_loss": 0.24944192171096802 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6995748281478882, + "learning_rate": 2.546270752095946e-05, + "loss": 0.2771, + "step": 12193, + "teacher_loss": 0.23019832372665405 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.27509185671806335, + "learning_rate": 2.5461079888605296e-05, + "loss": 0.1971, + "step": 12194, + "teacher_loss": 0.18840250372886658 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.8868666887283325, + "learning_rate": 2.545945201640993e-05, + "loss": 0.3226, + "step": 12195, + "teacher_loss": 0.2598605155944824 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6806921362876892, + "learning_rate": 2.5457823904410693e-05, + "loss": 0.3553, + "step": 12196, + "teacher_loss": 0.3191791772842407 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.5159366130828857, + "learning_rate": 2.5456195552644907e-05, + "loss": 0.3266, + "step": 12197, + "teacher_loss": 0.3055843412876129 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.22315362095832825, + "learning_rate": 2.5454566961149904e-05, + "loss": 0.2582, + "step": 12198, + "teacher_loss": 0.2620655298233032 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.6639368534088135, + "learning_rate": 2.545293812996303e-05, + "loss": 0.2666, + "step": 12199, + "teacher_loss": 0.2224433273077011 + }, + { + "compression_loss": 0.0, + "epoch": 2.2, + "label_loss": 0.3419730067253113, + "learning_rate": 2.545130905912162e-05, + "loss": 0.2959, + "step": 12200, + "teacher_loss": 0.2907707691192627 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.25674182176589966, + "learning_rate": 2.5449679748663027e-05, + "loss": 0.2321, + "step": 12201, + "teacher_loss": 0.2293972373008728 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.4763478636741638, + "learning_rate": 2.5448050198624606e-05, + "loss": 0.2808, + "step": 12202, + "teacher_loss": 0.25910478830337524 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.2921602725982666, + "learning_rate": 2.544642040904372e-05, + "loss": 0.2109, + "step": 12203, + "teacher_loss": 0.2018449902534485 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.30316439270973206, + "learning_rate": 2.544479037995774e-05, + "loss": 0.219, + "step": 12204, + "teacher_loss": 0.2096112072467804 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.18290674686431885, + "learning_rate": 2.5443160111404024e-05, + "loss": 0.1857, + "step": 12205, + "teacher_loss": 0.1859687864780426 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.578205943107605, + "learning_rate": 2.5441529603419963e-05, + "loss": 0.2439, + "step": 12206, + "teacher_loss": 0.20679515600204468 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.6500942707061768, + "learning_rate": 2.543989885604293e-05, + "loss": 0.3032, + "step": 12207, + "teacher_loss": 0.2647073268890381 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.7271655797958374, + "learning_rate": 2.5438267869310317e-05, + "loss": 0.3424, + "step": 12208, + "teacher_loss": 0.29963570833206177 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.2669234275817871, + "learning_rate": 2.5436636643259515e-05, + "loss": 0.2364, + "step": 12209, + "teacher_loss": 0.2330320179462433 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.30647045373916626, + "learning_rate": 2.543500517792793e-05, + "loss": 0.1983, + "step": 12210, + "teacher_loss": 0.18623371422290802 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.5310353636741638, + "learning_rate": 2.543337347335296e-05, + "loss": 0.2795, + "step": 12211, + "teacher_loss": 0.25157874822616577 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.8132265210151672, + "learning_rate": 2.5431741529572017e-05, + "loss": 0.4545, + "step": 12212, + "teacher_loss": 0.4146061837673187 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.35578739643096924, + "learning_rate": 2.5430109346622518e-05, + "loss": 0.232, + "step": 12213, + "teacher_loss": 0.21820631623268127 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.4285944402217865, + "learning_rate": 2.542847692454188e-05, + "loss": 0.2554, + "step": 12214, + "teacher_loss": 0.23618894815444946 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.3426033854484558, + "learning_rate": 2.5426844263367533e-05, + "loss": 0.3584, + "step": 12215, + "teacher_loss": 0.3601352274417877 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.6546006202697754, + "learning_rate": 2.542521136313691e-05, + "loss": 0.2752, + "step": 12216, + "teacher_loss": 0.23302070796489716 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.33751142024993896, + "learning_rate": 2.5423578223887445e-05, + "loss": 0.2075, + "step": 12217, + "teacher_loss": 0.193055659532547 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.2591434121131897, + "learning_rate": 2.5421944845656585e-05, + "loss": 0.2836, + "step": 12218, + "teacher_loss": 0.286365270614624 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.5391420722007751, + "learning_rate": 2.5420311228481775e-05, + "loss": 0.2913, + "step": 12219, + "teacher_loss": 0.26380959153175354 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.167050302028656, + "learning_rate": 2.541867737240047e-05, + "loss": 0.2161, + "step": 12220, + "teacher_loss": 0.2215423583984375 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.4708930253982544, + "learning_rate": 2.541704327745013e-05, + "loss": 0.2599, + "step": 12221, + "teacher_loss": 0.23643046617507935 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.9012323617935181, + "learning_rate": 2.541540894366822e-05, + "loss": 0.3429, + "step": 12222, + "teacher_loss": 0.28090861439704895 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.27898362278938293, + "learning_rate": 2.5413774371092208e-05, + "loss": 0.2377, + "step": 12223, + "teacher_loss": 0.2330804020166397 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.6240382790565491, + "learning_rate": 2.5412139559759575e-05, + "loss": 0.362, + "step": 12224, + "teacher_loss": 0.33287495374679565 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.6844403743743896, + "learning_rate": 2.5410504509707796e-05, + "loss": 0.3733, + "step": 12225, + "teacher_loss": 0.33878186345100403 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 1.0009214878082275, + "learning_rate": 2.5408869220974364e-05, + "loss": 0.7458, + "step": 12226, + "teacher_loss": 0.717411994934082 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.1303437352180481, + "learning_rate": 2.5407233693596767e-05, + "loss": 0.1787, + "step": 12227, + "teacher_loss": 0.18411031365394592 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.3079826235771179, + "learning_rate": 2.5405597927612504e-05, + "loss": 0.2932, + "step": 12228, + "teacher_loss": 0.291509747505188 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.19590017199516296, + "learning_rate": 2.5403961923059077e-05, + "loss": 0.1753, + "step": 12229, + "teacher_loss": 0.1729845106601715 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.20978929102420807, + "learning_rate": 2.5402325679973995e-05, + "loss": 0.2061, + "step": 12230, + "teacher_loss": 0.2056489884853363 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.322479784488678, + "learning_rate": 2.5400689198394776e-05, + "loss": 0.3062, + "step": 12231, + "teacher_loss": 0.30440637469291687 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.577194333076477, + "learning_rate": 2.5399052478358934e-05, + "loss": 0.2818, + "step": 12232, + "teacher_loss": 0.24897003173828125 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.9230172634124756, + "learning_rate": 2.5397415519903998e-05, + "loss": 0.4761, + "step": 12233, + "teacher_loss": 0.42638739943504333 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.5293896794319153, + "learning_rate": 2.53957783230675e-05, + "loss": 0.247, + "step": 12234, + "teacher_loss": 0.21563827991485596 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.3310241401195526, + "learning_rate": 2.5394140887886967e-05, + "loss": 0.2422, + "step": 12235, + "teacher_loss": 0.23234772682189941 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.29574957489967346, + "learning_rate": 2.539250321439995e-05, + "loss": 0.2906, + "step": 12236, + "teacher_loss": 0.2899932265281677 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.7096019983291626, + "learning_rate": 2.5390865302643993e-05, + "loss": 0.2405, + "step": 12237, + "teacher_loss": 0.18833017349243164 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.17499977350234985, + "learning_rate": 2.5389227152656646e-05, + "loss": 0.1554, + "step": 12238, + "teacher_loss": 0.15326133370399475 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.6122881174087524, + "learning_rate": 2.538758876447547e-05, + "loss": 0.3549, + "step": 12239, + "teacher_loss": 0.3263135552406311 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.2680577337741852, + "learning_rate": 2.538595013813803e-05, + "loss": 0.1792, + "step": 12240, + "teacher_loss": 0.16934826970100403 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.5762743949890137, + "learning_rate": 2.5384311273681885e-05, + "loss": 0.3814, + "step": 12241, + "teacher_loss": 0.3597896695137024 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.3890647888183594, + "learning_rate": 2.538267217114462e-05, + "loss": 0.2509, + "step": 12242, + "teacher_loss": 0.23554500937461853 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.47311797738075256, + "learning_rate": 2.538103283056382e-05, + "loss": 0.1978, + "step": 12243, + "teacher_loss": 0.16722692549228668 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.6457678079605103, + "learning_rate": 2.537939325197705e-05, + "loss": 0.2454, + "step": 12244, + "teacher_loss": 0.20094084739685059 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.4834545850753784, + "learning_rate": 2.5377753435421916e-05, + "loss": 0.231, + "step": 12245, + "teacher_loss": 0.20297545194625854 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.9597777128219604, + "learning_rate": 2.5376113380936003e-05, + "loss": 0.3049, + "step": 12246, + "teacher_loss": 0.23213037848472595 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.5074963569641113, + "learning_rate": 2.5374473088556927e-05, + "loss": 0.3856, + "step": 12247, + "teacher_loss": 0.3720594644546509 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.33791038393974304, + "learning_rate": 2.537283255832229e-05, + "loss": 0.235, + "step": 12248, + "teacher_loss": 0.22358958423137665 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.5721325874328613, + "learning_rate": 2.5371191790269692e-05, + "loss": 0.3622, + "step": 12249, + "teacher_loss": 0.3388659358024597 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.27837905287742615, + "learning_rate": 2.5369550784436767e-05, + "loss": 0.2977, + "step": 12250, + "teacher_loss": 0.29985642433166504 + }, + { + "epoch": 2.21, + "eval_exact_match": 79.29044465468307, + "eval_f1": 86.76596337668238, + "step": 12250 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.9296122789382935, + "learning_rate": 2.536790954086113e-05, + "loss": 0.3225, + "step": 12251, + "teacher_loss": 0.2550719976425171 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.2560519874095917, + "learning_rate": 2.536626805958041e-05, + "loss": 0.1957, + "step": 12252, + "teacher_loss": 0.1890290528535843 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.8779016733169556, + "learning_rate": 2.536462634063225e-05, + "loss": 0.2906, + "step": 12253, + "teacher_loss": 0.22538354992866516 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 1.0749298334121704, + "learning_rate": 2.5362984384054276e-05, + "loss": 0.9309, + "step": 12254, + "teacher_loss": 0.9148869514465332 + }, + { + "compression_loss": 0.0, + "epoch": 2.21, + "label_loss": 0.3358515799045563, + "learning_rate": 2.536134218988414e-05, + "loss": 0.1955, + "step": 12255, + "teacher_loss": 0.1799592226743698 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5111252069473267, + "learning_rate": 2.53596997581595e-05, + "loss": 0.2038, + "step": 12256, + "teacher_loss": 0.1696714460849762 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4583056569099426, + "learning_rate": 2.5358057088917998e-05, + "loss": 0.2966, + "step": 12257, + "teacher_loss": 0.27862897515296936 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.33745095133781433, + "learning_rate": 2.5356414182197304e-05, + "loss": 0.2372, + "step": 12258, + "teacher_loss": 0.22602951526641846 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 1.1009776592254639, + "learning_rate": 2.5354771038035083e-05, + "loss": 0.5399, + "step": 12259, + "teacher_loss": 0.4775271713733673 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4117460250854492, + "learning_rate": 2.5353127656469006e-05, + "loss": 0.2163, + "step": 12260, + "teacher_loss": 0.19461394846439362 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.3260696232318878, + "learning_rate": 2.5351484037536752e-05, + "loss": 0.2238, + "step": 12261, + "teacher_loss": 0.21238702535629272 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.2323688566684723, + "learning_rate": 2.5349840181276006e-05, + "loss": 0.2343, + "step": 12262, + "teacher_loss": 0.2345256507396698 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.27624520659446716, + "learning_rate": 2.5348196087724453e-05, + "loss": 0.2016, + "step": 12263, + "teacher_loss": 0.19335487484931946 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5712655782699585, + "learning_rate": 2.534655175691979e-05, + "loss": 0.223, + "step": 12264, + "teacher_loss": 0.18433691561222076 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.26065075397491455, + "learning_rate": 2.5344907188899715e-05, + "loss": 0.2413, + "step": 12265, + "teacher_loss": 0.23919284343719482 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5144751071929932, + "learning_rate": 2.5343262383701935e-05, + "loss": 0.2358, + "step": 12266, + "teacher_loss": 0.20479834079742432 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.2731708884239197, + "learning_rate": 2.5341617341364162e-05, + "loss": 0.25, + "step": 12267, + "teacher_loss": 0.24739684164524078 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.9103981256484985, + "learning_rate": 2.5339972061924107e-05, + "loss": 0.3201, + "step": 12268, + "teacher_loss": 0.2545044720172882 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4443921446800232, + "learning_rate": 2.5338326545419492e-05, + "loss": 0.2583, + "step": 12269, + "teacher_loss": 0.23766474425792694 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.908003568649292, + "learning_rate": 2.5336680791888046e-05, + "loss": 0.443, + "step": 12270, + "teacher_loss": 0.39138558506965637 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5844656229019165, + "learning_rate": 2.5335034801367504e-05, + "loss": 0.3635, + "step": 12271, + "teacher_loss": 0.3389894366264343 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5441825985908508, + "learning_rate": 2.533338857389559e-05, + "loss": 0.3986, + "step": 12272, + "teacher_loss": 0.38241827487945557 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.696174144744873, + "learning_rate": 2.533174210951007e-05, + "loss": 0.755, + "step": 12273, + "teacher_loss": 0.7615283727645874 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4746894836425781, + "learning_rate": 2.533009540824867e-05, + "loss": 0.2785, + "step": 12274, + "teacher_loss": 0.2566780149936676 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.37983238697052, + "learning_rate": 2.5328448470149155e-05, + "loss": 0.2736, + "step": 12275, + "teacher_loss": 0.26174378395080566 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.38325080275535583, + "learning_rate": 2.5326801295249287e-05, + "loss": 0.2921, + "step": 12276, + "teacher_loss": 0.28199535608291626 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4038093090057373, + "learning_rate": 2.532515388358682e-05, + "loss": 0.3282, + "step": 12277, + "teacher_loss": 0.3197876214981079 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.477444052696228, + "learning_rate": 2.532350623519954e-05, + "loss": 0.2698, + "step": 12278, + "teacher_loss": 0.2467428743839264 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 1.0434633493423462, + "learning_rate": 2.5321858350125206e-05, + "loss": 0.6004, + "step": 12279, + "teacher_loss": 0.5511615872383118 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.6344360113143921, + "learning_rate": 2.532021022840161e-05, + "loss": 0.3336, + "step": 12280, + "teacher_loss": 0.3002076745033264 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.3277432322502136, + "learning_rate": 2.5318561870066536e-05, + "loss": 0.3299, + "step": 12281, + "teacher_loss": 0.3301534652709961 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.23082366585731506, + "learning_rate": 2.5316913275157772e-05, + "loss": 0.2516, + "step": 12282, + "teacher_loss": 0.25393566489219666 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.30696696043014526, + "learning_rate": 2.531526444371312e-05, + "loss": 0.2247, + "step": 12283, + "teacher_loss": 0.21550793945789337 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.3932054936885834, + "learning_rate": 2.531361537577038e-05, + "loss": 0.2081, + "step": 12284, + "teacher_loss": 0.18748819828033447 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.3584969639778137, + "learning_rate": 2.5311966071367362e-05, + "loss": 0.2614, + "step": 12285, + "teacher_loss": 0.25062295794487 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.23466432094573975, + "learning_rate": 2.5310316530541883e-05, + "loss": 0.195, + "step": 12286, + "teacher_loss": 0.19064068794250488 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.2840055227279663, + "learning_rate": 2.530866675333175e-05, + "loss": 0.2686, + "step": 12287, + "teacher_loss": 0.2669365704059601 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 1.2123680114746094, + "learning_rate": 2.5307016739774802e-05, + "loss": 0.3259, + "step": 12288, + "teacher_loss": 0.22741109132766724 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.3435225486755371, + "learning_rate": 2.5305366489908858e-05, + "loss": 0.269, + "step": 12289, + "teacher_loss": 0.26074591279029846 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4015759229660034, + "learning_rate": 2.5303716003771757e-05, + "loss": 0.251, + "step": 12290, + "teacher_loss": 0.23431888222694397 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.13438430428504944, + "learning_rate": 2.5302065281401344e-05, + "loss": 0.1647, + "step": 12291, + "teacher_loss": 0.1680353432893753 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.688004732131958, + "learning_rate": 2.530041432283546e-05, + "loss": 0.4842, + "step": 12292, + "teacher_loss": 0.46154218912124634 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.81205153465271, + "learning_rate": 2.5298763128111956e-05, + "loss": 0.5582, + "step": 12293, + "teacher_loss": 0.5300028324127197 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.45525839924812317, + "learning_rate": 2.529711169726869e-05, + "loss": 0.2474, + "step": 12294, + "teacher_loss": 0.22434329986572266 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.20514178276062012, + "learning_rate": 2.5295460030343528e-05, + "loss": 0.2127, + "step": 12295, + "teacher_loss": 0.21359267830848694 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.24290992319583893, + "learning_rate": 2.529380812737433e-05, + "loss": 0.2109, + "step": 12296, + "teacher_loss": 0.20738664269447327 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.47009631991386414, + "learning_rate": 2.5292155988398984e-05, + "loss": 0.2567, + "step": 12297, + "teacher_loss": 0.2329806685447693 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5115929841995239, + "learning_rate": 2.529050361345535e-05, + "loss": 0.3712, + "step": 12298, + "teacher_loss": 0.3556061089038849 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4043690860271454, + "learning_rate": 2.5288851002581323e-05, + "loss": 0.2584, + "step": 12299, + "teacher_loss": 0.24222835898399353 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.8033109903335571, + "learning_rate": 2.528719815581479e-05, + "loss": 0.4493, + "step": 12300, + "teacher_loss": 0.4099869430065155 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.6267677545547485, + "learning_rate": 2.528554507319365e-05, + "loss": 0.3262, + "step": 12301, + "teacher_loss": 0.29279619455337524 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.412370502948761, + "learning_rate": 2.528389175475579e-05, + "loss": 0.2608, + "step": 12302, + "teacher_loss": 0.24394631385803223 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.11264127492904663, + "learning_rate": 2.5282238200539134e-05, + "loss": 0.1502, + "step": 12303, + "teacher_loss": 0.15435829758644104 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.2367410659790039, + "learning_rate": 2.5280584410581575e-05, + "loss": 0.2112, + "step": 12304, + "teacher_loss": 0.20840021967887878 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5079933404922485, + "learning_rate": 2.5278930384921046e-05, + "loss": 0.36, + "step": 12305, + "teacher_loss": 0.34352245926856995 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.5174824595451355, + "learning_rate": 2.527727612359546e-05, + "loss": 0.2778, + "step": 12306, + "teacher_loss": 0.25112849473953247 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.20662257075309753, + "learning_rate": 2.5275621626642743e-05, + "loss": 0.229, + "step": 12307, + "teacher_loss": 0.23151341080665588 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.4123964309692383, + "learning_rate": 2.5273966894100833e-05, + "loss": 0.1731, + "step": 12308, + "teacher_loss": 0.1465543806552887 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.26879632472991943, + "learning_rate": 2.527231192600766e-05, + "loss": 0.2096, + "step": 12309, + "teacher_loss": 0.20302796363830566 + }, + { + "compression_loss": 0.0, + "epoch": 2.22, + "label_loss": 0.36674627661705017, + "learning_rate": 2.5270656722401175e-05, + "loss": 0.2694, + "step": 12310, + "teacher_loss": 0.25857865810394287 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 1.0059449672698975, + "learning_rate": 2.5269001283319325e-05, + "loss": 0.3877, + "step": 12311, + "teacher_loss": 0.31898999214172363 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.603652834892273, + "learning_rate": 2.526734560880006e-05, + "loss": 0.3063, + "step": 12312, + "teacher_loss": 0.2732313275337219 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3125823736190796, + "learning_rate": 2.526568969888135e-05, + "loss": 0.2279, + "step": 12313, + "teacher_loss": 0.2184869349002838 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.9605036973953247, + "learning_rate": 2.526403355360115e-05, + "loss": 0.3873, + "step": 12314, + "teacher_loss": 0.32365259528160095 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.689501166343689, + "learning_rate": 2.526237717299743e-05, + "loss": 0.2791, + "step": 12315, + "teacher_loss": 0.23346194624900818 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.6650179028511047, + "learning_rate": 2.5260720557108177e-05, + "loss": 0.3886, + "step": 12316, + "teacher_loss": 0.3578924536705017 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.5012112259864807, + "learning_rate": 2.5259063705971362e-05, + "loss": 0.3322, + "step": 12317, + "teacher_loss": 0.31341585516929626 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.7076420783996582, + "learning_rate": 2.5257406619624973e-05, + "loss": 0.3736, + "step": 12318, + "teacher_loss": 0.33649519085884094 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.1985689401626587, + "learning_rate": 2.5255749298107005e-05, + "loss": 0.2025, + "step": 12319, + "teacher_loss": 0.20289430022239685 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.5653092861175537, + "learning_rate": 2.525409174145545e-05, + "loss": 0.4016, + "step": 12320, + "teacher_loss": 0.38338807225227356 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2578890919685364, + "learning_rate": 2.525243394970832e-05, + "loss": 0.3023, + "step": 12321, + "teacher_loss": 0.3072129786014557 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2701598107814789, + "learning_rate": 2.5250775922903617e-05, + "loss": 0.2705, + "step": 12322, + "teacher_loss": 0.2705652713775635 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.40176039934158325, + "learning_rate": 2.524911766107935e-05, + "loss": 0.3017, + "step": 12323, + "teacher_loss": 0.2906301021575928 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.34677693247795105, + "learning_rate": 2.524745916427355e-05, + "loss": 0.2365, + "step": 12324, + "teacher_loss": 0.22424519062042236 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.46364545822143555, + "learning_rate": 2.5245800432524233e-05, + "loss": 0.2939, + "step": 12325, + "teacher_loss": 0.2750820219516754 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.7258146405220032, + "learning_rate": 2.524414146586943e-05, + "loss": 0.2242, + "step": 12326, + "teacher_loss": 0.1684824824333191 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.17541539669036865, + "learning_rate": 2.5242482264347177e-05, + "loss": 0.2386, + "step": 12327, + "teacher_loss": 0.24560345709323883 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2925964891910553, + "learning_rate": 2.5240822827995513e-05, + "loss": 0.2453, + "step": 12328, + "teacher_loss": 0.24004411697387695 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.49576184153556824, + "learning_rate": 2.5239163156852486e-05, + "loss": 0.2554, + "step": 12329, + "teacher_loss": 0.22866977751255035 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3057345747947693, + "learning_rate": 2.5237503250956145e-05, + "loss": 0.2151, + "step": 12330, + "teacher_loss": 0.20503583550453186 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2105225920677185, + "learning_rate": 2.523584311034455e-05, + "loss": 0.2559, + "step": 12331, + "teacher_loss": 0.2609631419181824 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.5388713479042053, + "learning_rate": 2.523418273505576e-05, + "loss": 0.4055, + "step": 12332, + "teacher_loss": 0.3907148241996765 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2054351270198822, + "learning_rate": 2.5232522125127847e-05, + "loss": 0.2211, + "step": 12333, + "teacher_loss": 0.22279176115989685 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.19322121143341064, + "learning_rate": 2.523086128059888e-05, + "loss": 0.1828, + "step": 12334, + "teacher_loss": 0.18162457644939423 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.16752223670482635, + "learning_rate": 2.522920020150693e-05, + "loss": 0.2175, + "step": 12335, + "teacher_loss": 0.22306691110134125 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.4404308795928955, + "learning_rate": 2.52275388878901e-05, + "loss": 0.2492, + "step": 12336, + "teacher_loss": 0.22796694934368134 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3641465902328491, + "learning_rate": 2.5225877339786456e-05, + "loss": 0.2439, + "step": 12337, + "teacher_loss": 0.23058444261550903 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.10579530894756317, + "learning_rate": 2.5224215557234108e-05, + "loss": 0.2283, + "step": 12338, + "teacher_loss": 0.24196499586105347 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.20032191276550293, + "learning_rate": 2.5222553540271152e-05, + "loss": 0.1729, + "step": 12339, + "teacher_loss": 0.16986048221588135 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.22834090888500214, + "learning_rate": 2.522089128893569e-05, + "loss": 0.2164, + "step": 12340, + "teacher_loss": 0.21511945128440857 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2949870824813843, + "learning_rate": 2.5219228803265837e-05, + "loss": 0.1955, + "step": 12341, + "teacher_loss": 0.18441908061504364 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.28125235438346863, + "learning_rate": 2.5217566083299708e-05, + "loss": 0.3397, + "step": 12342, + "teacher_loss": 0.3462051451206207 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.5554111003875732, + "learning_rate": 2.521590312907542e-05, + "loss": 0.3863, + "step": 12343, + "teacher_loss": 0.36748817563056946 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.4666186571121216, + "learning_rate": 2.5214239940631098e-05, + "loss": 0.4264, + "step": 12344, + "teacher_loss": 0.4218958020210266 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.6386550068855286, + "learning_rate": 2.5212576518004884e-05, + "loss": 0.3441, + "step": 12345, + "teacher_loss": 0.31139230728149414 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3177204728126526, + "learning_rate": 2.5210912861234904e-05, + "loss": 0.2426, + "step": 12346, + "teacher_loss": 0.23427358269691467 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.5241358876228333, + "learning_rate": 2.5209248970359312e-05, + "loss": 0.3523, + "step": 12347, + "teacher_loss": 0.3332063853740692 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.36660993099212646, + "learning_rate": 2.5207584845416245e-05, + "loss": 0.2527, + "step": 12348, + "teacher_loss": 0.24004098773002625 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3544429540634155, + "learning_rate": 2.5205920486443863e-05, + "loss": 0.1726, + "step": 12349, + "teacher_loss": 0.15240055322647095 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2600035071372986, + "learning_rate": 2.5204255893480326e-05, + "loss": 0.1647, + "step": 12350, + "teacher_loss": 0.15409821271896362 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.22153496742248535, + "learning_rate": 2.520259106656379e-05, + "loss": 0.2229, + "step": 12351, + "teacher_loss": 0.22299712896347046 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.6594671607017517, + "learning_rate": 2.5200926005732437e-05, + "loss": 0.3902, + "step": 12352, + "teacher_loss": 0.36033380031585693 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3865515887737274, + "learning_rate": 2.5199260711024425e-05, + "loss": 0.2841, + "step": 12353, + "teacher_loss": 0.27273595333099365 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2779424488544464, + "learning_rate": 2.519759518247795e-05, + "loss": 0.2179, + "step": 12354, + "teacher_loss": 0.2112237960100174 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.4998508393764496, + "learning_rate": 2.5195929420131194e-05, + "loss": 0.3598, + "step": 12355, + "teacher_loss": 0.3441901206970215 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.9949327707290649, + "learning_rate": 2.5194263424022338e-05, + "loss": 0.3591, + "step": 12356, + "teacher_loss": 0.2884986698627472 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.2694770395755768, + "learning_rate": 2.519259719418959e-05, + "loss": 0.298, + "step": 12357, + "teacher_loss": 0.30119824409484863 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3596166670322418, + "learning_rate": 2.519093073067115e-05, + "loss": 0.2584, + "step": 12358, + "teacher_loss": 0.2471758872270584 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.3671939969062805, + "learning_rate": 2.5189264033505216e-05, + "loss": 0.213, + "step": 12359, + "teacher_loss": 0.19588702917099 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.6233421564102173, + "learning_rate": 2.5187597102730014e-05, + "loss": 0.2771, + "step": 12360, + "teacher_loss": 0.23864424228668213 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 1.1287585496902466, + "learning_rate": 2.5185929938383748e-05, + "loss": 0.8695, + "step": 12361, + "teacher_loss": 0.840638279914856 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.12235197424888611, + "learning_rate": 2.5184262540504654e-05, + "loss": 0.1577, + "step": 12362, + "teacher_loss": 0.1616610586643219 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.43008720874786377, + "learning_rate": 2.518259490913095e-05, + "loss": 0.2776, + "step": 12363, + "teacher_loss": 0.2606595754623413 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.10917875170707703, + "learning_rate": 2.518092704430087e-05, + "loss": 0.2169, + "step": 12364, + "teacher_loss": 0.22888991236686707 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.7224491834640503, + "learning_rate": 2.5179258946052664e-05, + "loss": 0.3426, + "step": 12365, + "teacher_loss": 0.3003872036933899 + }, + { + "compression_loss": 0.0, + "epoch": 2.23, + "label_loss": 0.46664172410964966, + "learning_rate": 2.5177590614424564e-05, + "loss": 0.1992, + "step": 12366, + "teacher_loss": 0.1694466471672058 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.25799527764320374, + "learning_rate": 2.5175922049454826e-05, + "loss": 0.1859, + "step": 12367, + "teacher_loss": 0.1779094785451889 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.22151395678520203, + "learning_rate": 2.5174253251181707e-05, + "loss": 0.1945, + "step": 12368, + "teacher_loss": 0.1915152221918106 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.28556329011917114, + "learning_rate": 2.5172584219643466e-05, + "loss": 0.2379, + "step": 12369, + "teacher_loss": 0.23260748386383057 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.7169944047927856, + "learning_rate": 2.5170914954878366e-05, + "loss": 0.3269, + "step": 12370, + "teacher_loss": 0.283573716878891 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.26275110244750977, + "learning_rate": 2.516924545692468e-05, + "loss": 0.2019, + "step": 12371, + "teacher_loss": 0.1951197236776352 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.8241665363311768, + "learning_rate": 2.5167575725820684e-05, + "loss": 0.3122, + "step": 12372, + "teacher_loss": 0.25532424449920654 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.4831430912017822, + "learning_rate": 2.516590576160466e-05, + "loss": 0.2744, + "step": 12373, + "teacher_loss": 0.2511821985244751 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 1.5642801523208618, + "learning_rate": 2.5164235564314896e-05, + "loss": 0.3503, + "step": 12374, + "teacher_loss": 0.21542152762413025 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 1.031935214996338, + "learning_rate": 2.5162565133989685e-05, + "loss": 0.4997, + "step": 12375, + "teacher_loss": 0.44060200452804565 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.38501325249671936, + "learning_rate": 2.516089447066732e-05, + "loss": 0.2633, + "step": 12376, + "teacher_loss": 0.24980224668979645 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.34710633754730225, + "learning_rate": 2.5159223574386117e-05, + "loss": 0.2682, + "step": 12377, + "teacher_loss": 0.2593998610973358 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.5922276973724365, + "learning_rate": 2.5157552445184372e-05, + "loss": 0.2502, + "step": 12378, + "teacher_loss": 0.21220096945762634 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.2799423933029175, + "learning_rate": 2.5155881083100402e-05, + "loss": 0.2017, + "step": 12379, + "teacher_loss": 0.19295164942741394 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.27575406432151794, + "learning_rate": 2.515420948817253e-05, + "loss": 0.249, + "step": 12380, + "teacher_loss": 0.24604164063930511 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.7159192562103271, + "learning_rate": 2.5152537660439078e-05, + "loss": 0.3301, + "step": 12381, + "teacher_loss": 0.2872636914253235 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.43319171667099, + "learning_rate": 2.5150865599938377e-05, + "loss": 0.4257, + "step": 12382, + "teacher_loss": 0.42484402656555176 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.4488913118839264, + "learning_rate": 2.514919330670876e-05, + "loss": 0.2145, + "step": 12383, + "teacher_loss": 0.188436821103096 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.7564199566841125, + "learning_rate": 2.514752078078857e-05, + "loss": 0.3163, + "step": 12384, + "teacher_loss": 0.2674005925655365 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.5323621034622192, + "learning_rate": 2.5145848022216157e-05, + "loss": 0.2552, + "step": 12385, + "teacher_loss": 0.22435428202152252 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.4655833840370178, + "learning_rate": 2.5144175031029863e-05, + "loss": 0.2853, + "step": 12386, + "teacher_loss": 0.2652449905872345 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.27367931604385376, + "learning_rate": 2.514250180726805e-05, + "loss": 0.2329, + "step": 12387, + "teacher_loss": 0.22837644815444946 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.2601550221443176, + "learning_rate": 2.5140828350969076e-05, + "loss": 0.1812, + "step": 12388, + "teacher_loss": 0.17247986793518066 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.352405309677124, + "learning_rate": 2.5139154662171316e-05, + "loss": 0.2368, + "step": 12389, + "teacher_loss": 0.22398895025253296 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.4569525420665741, + "learning_rate": 2.5137480740913136e-05, + "loss": 0.1855, + "step": 12390, + "teacher_loss": 0.15537753701210022 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.598187267780304, + "learning_rate": 2.513580658723292e-05, + "loss": 0.1885, + "step": 12391, + "teacher_loss": 0.14298535883426666 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.3036641478538513, + "learning_rate": 2.5134132201169042e-05, + "loss": 0.1919, + "step": 12392, + "teacher_loss": 0.17942848801612854 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.298325777053833, + "learning_rate": 2.51324575827599e-05, + "loss": 0.2851, + "step": 12393, + "teacher_loss": 0.28361421823501587 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.1908550262451172, + "learning_rate": 2.5130782732043885e-05, + "loss": 0.1807, + "step": 12394, + "teacher_loss": 0.17959949374198914 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.08722509443759918, + "learning_rate": 2.5129107649059395e-05, + "loss": 0.22, + "step": 12395, + "teacher_loss": 0.23478442430496216 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.5926755666732788, + "learning_rate": 2.5127432333844836e-05, + "loss": 0.35, + "step": 12396, + "teacher_loss": 0.32299649715423584 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.981011152267456, + "learning_rate": 2.5125756786438615e-05, + "loss": 0.4414, + "step": 12397, + "teacher_loss": 0.3814009428024292 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.3214312195777893, + "learning_rate": 2.5124081006879148e-05, + "loss": 0.2711, + "step": 12398, + "teacher_loss": 0.265512615442276 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.6141905784606934, + "learning_rate": 2.512240499520486e-05, + "loss": 0.2893, + "step": 12399, + "teacher_loss": 0.253153920173645 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.6215115785598755, + "learning_rate": 2.5120728751454175e-05, + "loss": 0.2918, + "step": 12400, + "teacher_loss": 0.25519269704818726 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.5350840091705322, + "learning_rate": 2.511905227566552e-05, + "loss": 0.2205, + "step": 12401, + "teacher_loss": 0.18551771342754364 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.28319722414016724, + "learning_rate": 2.5117375567877335e-05, + "loss": 0.1994, + "step": 12402, + "teacher_loss": 0.190045565366745 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.3871760964393616, + "learning_rate": 2.5115698628128065e-05, + "loss": 0.2039, + "step": 12403, + "teacher_loss": 0.18352742493152618 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.38654810190200806, + "learning_rate": 2.511402145645615e-05, + "loss": 0.1853, + "step": 12404, + "teacher_loss": 0.16290079057216644 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.7009180784225464, + "learning_rate": 2.511234405290005e-05, + "loss": 0.267, + "step": 12405, + "teacher_loss": 0.21875181794166565 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.37582865357398987, + "learning_rate": 2.5110666417498217e-05, + "loss": 0.2261, + "step": 12406, + "teacher_loss": 0.20947568118572235 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.4072246551513672, + "learning_rate": 2.510898855028912e-05, + "loss": 0.2537, + "step": 12407, + "teacher_loss": 0.23660865426063538 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.4788782298564911, + "learning_rate": 2.510731045131122e-05, + "loss": 0.2341, + "step": 12408, + "teacher_loss": 0.20688827335834503 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.3572787344455719, + "learning_rate": 2.5105632120602994e-05, + "loss": 0.2772, + "step": 12409, + "teacher_loss": 0.26835209131240845 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.9476114511489868, + "learning_rate": 2.5103953558202925e-05, + "loss": 0.3384, + "step": 12410, + "teacher_loss": 0.27070653438568115 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.27621549367904663, + "learning_rate": 2.5102274764149493e-05, + "loss": 0.2388, + "step": 12411, + "teacher_loss": 0.234610915184021 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.5277109146118164, + "learning_rate": 2.5100595738481187e-05, + "loss": 0.3384, + "step": 12412, + "teacher_loss": 0.31734293699264526 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.3266430199146271, + "learning_rate": 2.5098916481236505e-05, + "loss": 0.3007, + "step": 12413, + "teacher_loss": 0.2977985143661499 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.43012717366218567, + "learning_rate": 2.5097236992453945e-05, + "loss": 0.3224, + "step": 12414, + "teacher_loss": 0.3104206323623657 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.8111578226089478, + "learning_rate": 2.5095557272172015e-05, + "loss": 0.3192, + "step": 12415, + "teacher_loss": 0.2645253539085388 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.35283225774765015, + "learning_rate": 2.5093877320429227e-05, + "loss": 0.2315, + "step": 12416, + "teacher_loss": 0.21797379851341248 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.6910042762756348, + "learning_rate": 2.5092197137264093e-05, + "loss": 0.2358, + "step": 12417, + "teacher_loss": 0.1852441281080246 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.9138078689575195, + "learning_rate": 2.5090516722715138e-05, + "loss": 0.3345, + "step": 12418, + "teacher_loss": 0.2701514661312103 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.3401826322078705, + "learning_rate": 2.5088836076820885e-05, + "loss": 0.231, + "step": 12419, + "teacher_loss": 0.2188795804977417 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 0.38918742537498474, + "learning_rate": 2.508715519961987e-05, + "loss": 0.2489, + "step": 12420, + "teacher_loss": 0.23334649205207825 + }, + { + "compression_loss": 0.0, + "epoch": 2.24, + "label_loss": 1.1607577800750732, + "learning_rate": 2.508547409115063e-05, + "loss": 0.56, + "step": 12421, + "teacher_loss": 0.4932183623313904 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.3616155683994293, + "learning_rate": 2.5083792751451707e-05, + "loss": 0.3087, + "step": 12422, + "teacher_loss": 0.3028377890586853 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 1.1060240268707275, + "learning_rate": 2.5082111180561646e-05, + "loss": 0.3738, + "step": 12423, + "teacher_loss": 0.29246437549591064 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.18614168465137482, + "learning_rate": 2.5080429378519004e-05, + "loss": 0.2952, + "step": 12424, + "teacher_loss": 0.3073011338710785 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.20125222206115723, + "learning_rate": 2.507874734536234e-05, + "loss": 0.1697, + "step": 12425, + "teacher_loss": 0.16613906621932983 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 1.0296711921691895, + "learning_rate": 2.5077065081130217e-05, + "loss": 0.3491, + "step": 12426, + "teacher_loss": 0.27345359325408936 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.3762807846069336, + "learning_rate": 2.5075382585861205e-05, + "loss": 0.3469, + "step": 12427, + "teacher_loss": 0.3435822129249573 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.2760469913482666, + "learning_rate": 2.5073699859593874e-05, + "loss": 0.1915, + "step": 12428, + "teacher_loss": 0.18213629722595215 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.4713650047779083, + "learning_rate": 2.5072016902366818e-05, + "loss": 0.2741, + "step": 12429, + "teacher_loss": 0.25217410922050476 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.3024109899997711, + "learning_rate": 2.5070333714218604e-05, + "loss": 0.2178, + "step": 12430, + "teacher_loss": 0.20842163264751434 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.21881170570850372, + "learning_rate": 2.506865029518783e-05, + "loss": 0.202, + "step": 12431, + "teacher_loss": 0.20007693767547607 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.5328092575073242, + "learning_rate": 2.506696664531309e-05, + "loss": 0.3248, + "step": 12432, + "teacher_loss": 0.30166205763816833 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.11480280011892319, + "learning_rate": 2.5065282764632992e-05, + "loss": 0.2431, + "step": 12433, + "teacher_loss": 0.2573142647743225 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.3078177869319916, + "learning_rate": 2.5063598653186143e-05, + "loss": 0.3077, + "step": 12434, + "teacher_loss": 0.3077346682548523 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.594077467918396, + "learning_rate": 2.506191431101114e-05, + "loss": 0.2496, + "step": 12435, + "teacher_loss": 0.21134255826473236 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.32119521498680115, + "learning_rate": 2.506022973814661e-05, + "loss": 0.2102, + "step": 12436, + "teacher_loss": 0.19791030883789062 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.3036797046661377, + "learning_rate": 2.5058544934631178e-05, + "loss": 0.3126, + "step": 12437, + "teacher_loss": 0.31364506483078003 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.468548059463501, + "learning_rate": 2.5056859900503462e-05, + "loss": 0.4161, + "step": 12438, + "teacher_loss": 0.41031479835510254 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.26208731532096863, + "learning_rate": 2.5055174635802106e-05, + "loss": 0.2121, + "step": 12439, + "teacher_loss": 0.2065260410308838 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.7338371872901917, + "learning_rate": 2.5053489140565737e-05, + "loss": 0.2328, + "step": 12440, + "teacher_loss": 0.17709453403949738 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.718498945236206, + "learning_rate": 2.5051803414833008e-05, + "loss": 0.507, + "step": 12441, + "teacher_loss": 0.48351842164993286 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.5107282400131226, + "learning_rate": 2.5050117458642566e-05, + "loss": 0.2644, + "step": 12442, + "teacher_loss": 0.2370738983154297 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.5591605305671692, + "learning_rate": 2.5048431272033058e-05, + "loss": 0.2987, + "step": 12443, + "teacher_loss": 0.2697696089744568 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.34486252069473267, + "learning_rate": 2.5046744855043147e-05, + "loss": 0.2359, + "step": 12444, + "teacher_loss": 0.22376489639282227 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.24425595998764038, + "learning_rate": 2.5045058207711503e-05, + "loss": 0.2221, + "step": 12445, + "teacher_loss": 0.2196749746799469 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.1774607002735138, + "learning_rate": 2.504337133007679e-05, + "loss": 0.1448, + "step": 12446, + "teacher_loss": 0.14113838970661163 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.579138994216919, + "learning_rate": 2.504168422217768e-05, + "loss": 0.3361, + "step": 12447, + "teacher_loss": 0.30908310413360596 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.8592791557312012, + "learning_rate": 2.503999688405286e-05, + "loss": 0.6142, + "step": 12448, + "teacher_loss": 0.5869715213775635 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.5177626609802246, + "learning_rate": 2.5038309315741013e-05, + "loss": 0.2527, + "step": 12449, + "teacher_loss": 0.2232607901096344 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.7014118432998657, + "learning_rate": 2.503662151728083e-05, + "loss": 0.315, + "step": 12450, + "teacher_loss": 0.2720857858657837 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.7837603092193604, + "learning_rate": 2.5034933488711004e-05, + "loss": 0.3439, + "step": 12451, + "teacher_loss": 0.29497236013412476 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.6769936084747314, + "learning_rate": 2.5033245230070238e-05, + "loss": 0.3023, + "step": 12452, + "teacher_loss": 0.2606814503669739 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.1808786541223526, + "learning_rate": 2.5031556741397248e-05, + "loss": 0.1769, + "step": 12453, + "teacher_loss": 0.17642217874526978 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.4352392256259918, + "learning_rate": 2.5029868022730734e-05, + "loss": 0.3776, + "step": 12454, + "teacher_loss": 0.3712334930896759 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.23163536190986633, + "learning_rate": 2.5028179074109418e-05, + "loss": 0.1717, + "step": 12455, + "teacher_loss": 0.16507402062416077 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.3340536952018738, + "learning_rate": 2.502648989557202e-05, + "loss": 0.2201, + "step": 12456, + "teacher_loss": 0.20744773745536804 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.30356213450431824, + "learning_rate": 2.502480048715727e-05, + "loss": 0.1938, + "step": 12457, + "teacher_loss": 0.18157672882080078 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.5310879945755005, + "learning_rate": 2.50231108489039e-05, + "loss": 0.3114, + "step": 12458, + "teacher_loss": 0.286965012550354 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.27941781282424927, + "learning_rate": 2.5021420980850653e-05, + "loss": 0.3636, + "step": 12459, + "teacher_loss": 0.3730059862136841 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.4968075454235077, + "learning_rate": 2.5019730883036265e-05, + "loss": 0.2928, + "step": 12460, + "teacher_loss": 0.2701363265514374 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.8067477345466614, + "learning_rate": 2.501804055549949e-05, + "loss": 0.3855, + "step": 12461, + "teacher_loss": 0.3387216329574585 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.2549434304237366, + "learning_rate": 2.5016349998279083e-05, + "loss": 0.1938, + "step": 12462, + "teacher_loss": 0.18702520430088043 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.19229668378829956, + "learning_rate": 2.5014659211413797e-05, + "loss": 0.2082, + "step": 12463, + "teacher_loss": 0.21000494062900543 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.2706279158592224, + "learning_rate": 2.5012968194942402e-05, + "loss": 0.237, + "step": 12464, + "teacher_loss": 0.2332686334848404 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.7078277468681335, + "learning_rate": 2.501127694890367e-05, + "loss": 0.2465, + "step": 12465, + "teacher_loss": 0.1952914595603943 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 1.0988714694976807, + "learning_rate": 2.5009585473336367e-05, + "loss": 0.7812, + "step": 12466, + "teacher_loss": 0.7459425926208496 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.8042713403701782, + "learning_rate": 2.5007893768279286e-05, + "loss": 0.3015, + "step": 12467, + "teacher_loss": 0.2455812245607376 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.17142805457115173, + "learning_rate": 2.50062018337712e-05, + "loss": 0.2256, + "step": 12468, + "teacher_loss": 0.2316223531961441 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.884581983089447, + "learning_rate": 2.500450966985091e-05, + "loss": 0.3603, + "step": 12469, + "teacher_loss": 0.30199873447418213 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.2815982401371002, + "learning_rate": 2.5002817276557206e-05, + "loss": 0.1867, + "step": 12470, + "teacher_loss": 0.1761147677898407 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.25426995754241943, + "learning_rate": 2.500112465392889e-05, + "loss": 0.2518, + "step": 12471, + "teacher_loss": 0.2514955401420593 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.926116943359375, + "learning_rate": 2.4999431802004772e-05, + "loss": 0.2665, + "step": 12472, + "teacher_loss": 0.19315484166145325 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.45118045806884766, + "learning_rate": 2.4997738720823666e-05, + "loss": 0.2295, + "step": 12473, + "teacher_loss": 0.2048342376947403 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.48730432987213135, + "learning_rate": 2.4996045410424386e-05, + "loss": 0.3418, + "step": 12474, + "teacher_loss": 0.3256661891937256 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.6854239702224731, + "learning_rate": 2.4994351870845752e-05, + "loss": 0.3122, + "step": 12475, + "teacher_loss": 0.2707221508026123 + }, + { + "compression_loss": 0.0, + "epoch": 2.25, + "label_loss": 0.6844598054885864, + "learning_rate": 2.4992658102126594e-05, + "loss": 0.3126, + "step": 12476, + "teacher_loss": 0.27124080061912537 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.6983169317245483, + "learning_rate": 2.499096410430574e-05, + "loss": 0.4343, + "step": 12477, + "teacher_loss": 0.40491223335266113 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.31748753786087036, + "learning_rate": 2.4989269877422047e-05, + "loss": 0.2389, + "step": 12478, + "teacher_loss": 0.230166956782341 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.1992478370666504, + "learning_rate": 2.4987575421514338e-05, + "loss": 0.2306, + "step": 12479, + "teacher_loss": 0.234049454331398 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.26578769087791443, + "learning_rate": 2.4985880736621467e-05, + "loss": 0.1775, + "step": 12480, + "teacher_loss": 0.16771894693374634 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.2331969141960144, + "learning_rate": 2.498418582278229e-05, + "loss": 0.3109, + "step": 12481, + "teacher_loss": 0.3195742070674896 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.3971533179283142, + "learning_rate": 2.4982490680035668e-05, + "loss": 0.3003, + "step": 12482, + "teacher_loss": 0.28949248790740967 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.31105104088783264, + "learning_rate": 2.498079530842046e-05, + "loss": 0.2199, + "step": 12483, + "teacher_loss": 0.2097305804491043 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.6346704959869385, + "learning_rate": 2.497909970797555e-05, + "loss": 0.2819, + "step": 12484, + "teacher_loss": 0.24269193410873413 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5110999345779419, + "learning_rate": 2.4977403878739793e-05, + "loss": 0.2657, + "step": 12485, + "teacher_loss": 0.23842313885688782 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5784667730331421, + "learning_rate": 2.4975707820752078e-05, + "loss": 0.2474, + "step": 12486, + "teacher_loss": 0.2106650024652481 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.6825512051582336, + "learning_rate": 2.4974011534051297e-05, + "loss": 0.2365, + "step": 12487, + "teacher_loss": 0.1869291514158249 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.4182283282279968, + "learning_rate": 2.4972315018676334e-05, + "loss": 0.4759, + "step": 12488, + "teacher_loss": 0.4823169708251953 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5324483513832092, + "learning_rate": 2.4970618274666087e-05, + "loss": 0.2425, + "step": 12489, + "teacher_loss": 0.21033841371536255 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.43507567048072815, + "learning_rate": 2.4968921302059452e-05, + "loss": 0.3342, + "step": 12490, + "teacher_loss": 0.3229830265045166 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5093084573745728, + "learning_rate": 2.4967224100895342e-05, + "loss": 0.2482, + "step": 12491, + "teacher_loss": 0.21914741396903992 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.612106204032898, + "learning_rate": 2.4965526671212666e-05, + "loss": 0.3166, + "step": 12492, + "teacher_loss": 0.2837115526199341 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.48089852929115295, + "learning_rate": 2.4963829013050344e-05, + "loss": 0.2472, + "step": 12493, + "teacher_loss": 0.22123689949512482 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.31869077682495117, + "learning_rate": 2.4962131126447292e-05, + "loss": 0.1997, + "step": 12494, + "teacher_loss": 0.18648570775985718 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.37320131063461304, + "learning_rate": 2.4960433011442445e-05, + "loss": 0.2413, + "step": 12495, + "teacher_loss": 0.22659853100776672 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5218843221664429, + "learning_rate": 2.495873466807473e-05, + "loss": 0.2672, + "step": 12496, + "teacher_loss": 0.238898366689682 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5973473191261292, + "learning_rate": 2.4957036096383093e-05, + "loss": 0.4843, + "step": 12497, + "teacher_loss": 0.4716907739639282 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.8592730760574341, + "learning_rate": 2.4955337296406464e-05, + "loss": 0.3459, + "step": 12498, + "teacher_loss": 0.2888760566711426 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5340474843978882, + "learning_rate": 2.4953638268183796e-05, + "loss": 0.3955, + "step": 12499, + "teacher_loss": 0.3801245093345642 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.4711238741874695, + "learning_rate": 2.495193901175405e-05, + "loss": 0.4534, + "step": 12500, + "teacher_loss": 0.45140478014945984 + }, + { + "epoch": 2.26, + "eval_exact_match": 79.66887417218543, + "eval_f1": 86.99380135903661, + "step": 12500 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5490753650665283, + "learning_rate": 2.495023952715618e-05, + "loss": 0.2264, + "step": 12501, + "teacher_loss": 0.19051897525787354 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.2956361472606659, + "learning_rate": 2.4948539814429148e-05, + "loss": 0.3202, + "step": 12502, + "teacher_loss": 0.32292336225509644 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.4812681972980499, + "learning_rate": 2.494683987361193e-05, + "loss": 0.3406, + "step": 12503, + "teacher_loss": 0.3249479830265045 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.24697396159172058, + "learning_rate": 2.494513970474349e-05, + "loss": 0.2478, + "step": 12504, + "teacher_loss": 0.24792295694351196 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.3574371039867401, + "learning_rate": 2.4943439307862818e-05, + "loss": 0.2795, + "step": 12505, + "teacher_loss": 0.27086251974105835 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.25199514627456665, + "learning_rate": 2.4941738683008892e-05, + "loss": 0.207, + "step": 12506, + "teacher_loss": 0.20205508172512054 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.3918592631816864, + "learning_rate": 2.4940037830220705e-05, + "loss": 0.2103, + "step": 12507, + "teacher_loss": 0.19016845524311066 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.25740641355514526, + "learning_rate": 2.4938336749537253e-05, + "loss": 0.2962, + "step": 12508, + "teacher_loss": 0.3005411922931671 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.686660647392273, + "learning_rate": 2.493663544099754e-05, + "loss": 0.3121, + "step": 12509, + "teacher_loss": 0.2705281674861908 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.2648322582244873, + "learning_rate": 2.493493390464056e-05, + "loss": 0.1813, + "step": 12510, + "teacher_loss": 0.17202773690223694 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.28975337743759155, + "learning_rate": 2.493323214050534e-05, + "loss": 0.2357, + "step": 12511, + "teacher_loss": 0.22966080904006958 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.39938560128211975, + "learning_rate": 2.4931530148630883e-05, + "loss": 0.2839, + "step": 12512, + "teacher_loss": 0.27103251218795776 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.15705978870391846, + "learning_rate": 2.4929827929056213e-05, + "loss": 0.1816, + "step": 12513, + "teacher_loss": 0.1842774599790573 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.7605505585670471, + "learning_rate": 2.4928125481820368e-05, + "loss": 0.2459, + "step": 12514, + "teacher_loss": 0.1887163519859314 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.3978753685951233, + "learning_rate": 2.492642280696237e-05, + "loss": 0.2234, + "step": 12515, + "teacher_loss": 0.2039695680141449 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.20168451964855194, + "learning_rate": 2.4924719904521254e-05, + "loss": 0.1695, + "step": 12516, + "teacher_loss": 0.16588471829891205 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.9932999014854431, + "learning_rate": 2.4923016774536067e-05, + "loss": 0.3706, + "step": 12517, + "teacher_loss": 0.30135998129844666 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.592831552028656, + "learning_rate": 2.4921313417045857e-05, + "loss": 0.3943, + "step": 12518, + "teacher_loss": 0.37229329347610474 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.6639014482498169, + "learning_rate": 2.4919609832089676e-05, + "loss": 0.4947, + "step": 12519, + "teacher_loss": 0.47589507699012756 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.1901385635137558, + "learning_rate": 2.491790601970658e-05, + "loss": 0.1983, + "step": 12520, + "teacher_loss": 0.19923299551010132 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.25974661111831665, + "learning_rate": 2.491620197993564e-05, + "loss": 0.2114, + "step": 12521, + "teacher_loss": 0.20604172348976135 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.45143961906433105, + "learning_rate": 2.4914497712815917e-05, + "loss": 0.4274, + "step": 12522, + "teacher_loss": 0.42470675706863403 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.6493076682090759, + "learning_rate": 2.4912793218386484e-05, + "loss": 0.3064, + "step": 12523, + "teacher_loss": 0.26826074719429016 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.6655941605567932, + "learning_rate": 2.4911088496686422e-05, + "loss": 0.2173, + "step": 12524, + "teacher_loss": 0.1674552857875824 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5885056257247925, + "learning_rate": 2.4909383547754817e-05, + "loss": 0.3462, + "step": 12525, + "teacher_loss": 0.3192649483680725 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.4662756621837616, + "learning_rate": 2.4907678371630757e-05, + "loss": 0.2976, + "step": 12526, + "teacher_loss": 0.2788658142089844 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.2689884305000305, + "learning_rate": 2.4905972968353343e-05, + "loss": 0.2023, + "step": 12527, + "teacher_loss": 0.19487161934375763 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.3776562809944153, + "learning_rate": 2.490426733796166e-05, + "loss": 0.2818, + "step": 12528, + "teacher_loss": 0.2711542546749115 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.38208937644958496, + "learning_rate": 2.4902561480494826e-05, + "loss": 0.2328, + "step": 12529, + "teacher_loss": 0.21626605093479156 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.38997215032577515, + "learning_rate": 2.4900855395991942e-05, + "loss": 0.2399, + "step": 12530, + "teacher_loss": 0.22323068976402283 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.5471556186676025, + "learning_rate": 2.489914908449213e-05, + "loss": 0.3361, + "step": 12531, + "teacher_loss": 0.3126351833343506 + }, + { + "compression_loss": 0.0, + "epoch": 2.26, + "label_loss": 0.25133901834487915, + "learning_rate": 2.4897442546034512e-05, + "loss": 0.2212, + "step": 12532, + "teacher_loss": 0.21784579753875732 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.44157394766807556, + "learning_rate": 2.489573578065821e-05, + "loss": 0.2291, + "step": 12533, + "teacher_loss": 0.20552313327789307 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.4130289852619171, + "learning_rate": 2.4894028788402352e-05, + "loss": 0.2631, + "step": 12534, + "teacher_loss": 0.24649138748645782 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.4055495262145996, + "learning_rate": 2.489232156930608e-05, + "loss": 0.2167, + "step": 12535, + "teacher_loss": 0.1957492232322693 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5434260368347168, + "learning_rate": 2.489061412340853e-05, + "loss": 0.3274, + "step": 12536, + "teacher_loss": 0.30337586998939514 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.32822442054748535, + "learning_rate": 2.488890645074886e-05, + "loss": 0.2183, + "step": 12537, + "teacher_loss": 0.206126406788826 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.3327506184577942, + "learning_rate": 2.4887198551366206e-05, + "loss": 0.272, + "step": 12538, + "teacher_loss": 0.2652987837791443 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.3866737484931946, + "learning_rate": 2.4885490425299736e-05, + "loss": 0.2614, + "step": 12539, + "teacher_loss": 0.24746572971343994 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5399439930915833, + "learning_rate": 2.4883782072588604e-05, + "loss": 0.2845, + "step": 12540, + "teacher_loss": 0.2560719847679138 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5061490535736084, + "learning_rate": 2.4882073493271988e-05, + "loss": 0.296, + "step": 12541, + "teacher_loss": 0.2726753354072571 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.10065954178571701, + "learning_rate": 2.488036468738905e-05, + "loss": 0.1788, + "step": 12542, + "teacher_loss": 0.18744251132011414 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.26066139340400696, + "learning_rate": 2.4878655654978973e-05, + "loss": 0.3447, + "step": 12543, + "teacher_loss": 0.3539879620075226 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5510700941085815, + "learning_rate": 2.4876946396080946e-05, + "loss": 0.3587, + "step": 12544, + "teacher_loss": 0.3372902572154999 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.2775374948978424, + "learning_rate": 2.4875236910734145e-05, + "loss": 0.1607, + "step": 12545, + "teacher_loss": 0.14766675233840942 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5086871981620789, + "learning_rate": 2.487352719897777e-05, + "loss": 0.2464, + "step": 12546, + "teacher_loss": 0.21727171540260315 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5366396307945251, + "learning_rate": 2.487181726085102e-05, + "loss": 0.2226, + "step": 12547, + "teacher_loss": 0.1876683235168457 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.6070277690887451, + "learning_rate": 2.4870107096393095e-05, + "loss": 0.2706, + "step": 12548, + "teacher_loss": 0.233234703540802 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.21357940137386322, + "learning_rate": 2.486839670564321e-05, + "loss": 0.1809, + "step": 12549, + "teacher_loss": 0.17726486921310425 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.8291566371917725, + "learning_rate": 2.4866686088640574e-05, + "loss": 0.3084, + "step": 12550, + "teacher_loss": 0.2505527138710022 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.7683846950531006, + "learning_rate": 2.4864975245424403e-05, + "loss": 0.3819, + "step": 12551, + "teacher_loss": 0.3389424681663513 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.4099001884460449, + "learning_rate": 2.4863264176033936e-05, + "loss": 0.1966, + "step": 12552, + "teacher_loss": 0.1729380488395691 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.45399191975593567, + "learning_rate": 2.4861552880508385e-05, + "loss": 0.2722, + "step": 12553, + "teacher_loss": 0.25204190611839294 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.42610734701156616, + "learning_rate": 2.4859841358886993e-05, + "loss": 0.299, + "step": 12554, + "teacher_loss": 0.2848728895187378 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.46575161814689636, + "learning_rate": 2.4858129611209005e-05, + "loss": 0.5935, + "step": 12555, + "teacher_loss": 0.607714056968689 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.8861938118934631, + "learning_rate": 2.485641763751366e-05, + "loss": 0.3314, + "step": 12556, + "teacher_loss": 0.26979681849479675 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.3615124821662903, + "learning_rate": 2.485470543784021e-05, + "loss": 0.2185, + "step": 12557, + "teacher_loss": 0.20262214541435242 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.6139078140258789, + "learning_rate": 2.4852993012227908e-05, + "loss": 0.3569, + "step": 12558, + "teacher_loss": 0.32838189601898193 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.3575991988182068, + "learning_rate": 2.4851280360716017e-05, + "loss": 0.2659, + "step": 12559, + "teacher_loss": 0.255714476108551 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.17090153694152832, + "learning_rate": 2.4849567483343803e-05, + "loss": 0.1647, + "step": 12560, + "teacher_loss": 0.16402272880077362 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.2501254081726074, + "learning_rate": 2.4847854380150535e-05, + "loss": 0.2092, + "step": 12561, + "teacher_loss": 0.20463499426841736 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.22109168767929077, + "learning_rate": 2.48461410511755e-05, + "loss": 0.2612, + "step": 12562, + "teacher_loss": 0.26566118001937866 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.2977604568004608, + "learning_rate": 2.4844427496457966e-05, + "loss": 0.2298, + "step": 12563, + "teacher_loss": 0.22224295139312744 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5917311906814575, + "learning_rate": 2.484271371603722e-05, + "loss": 0.3686, + "step": 12564, + "teacher_loss": 0.34376245737075806 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.29340487718582153, + "learning_rate": 2.4840999709952563e-05, + "loss": 0.178, + "step": 12565, + "teacher_loss": 0.16520051658153534 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5470061302185059, + "learning_rate": 2.4839285478243286e-05, + "loss": 0.2867, + "step": 12566, + "teacher_loss": 0.2577952742576599 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.3474600315093994, + "learning_rate": 2.4837571020948695e-05, + "loss": 0.2905, + "step": 12567, + "teacher_loss": 0.28420406579971313 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.28272077441215515, + "learning_rate": 2.4835856338108095e-05, + "loss": 0.1794, + "step": 12568, + "teacher_loss": 0.16790196299552917 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.4079461693763733, + "learning_rate": 2.48341414297608e-05, + "loss": 0.2594, + "step": 12569, + "teacher_loss": 0.24289198219776154 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.394878625869751, + "learning_rate": 2.483242629594612e-05, + "loss": 0.1792, + "step": 12570, + "teacher_loss": 0.15524597465991974 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.38712814450263977, + "learning_rate": 2.4830710936703392e-05, + "loss": 0.2225, + "step": 12571, + "teacher_loss": 0.2041681706905365 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.4921199679374695, + "learning_rate": 2.4828995352071927e-05, + "loss": 0.273, + "step": 12572, + "teacher_loss": 0.24862496554851532 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.2518167495727539, + "learning_rate": 2.4827279542091072e-05, + "loss": 0.2261, + "step": 12573, + "teacher_loss": 0.2232484519481659 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.29522982239723206, + "learning_rate": 2.4825563506800162e-05, + "loss": 0.2395, + "step": 12574, + "teacher_loss": 0.23333227634429932 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.6245726346969604, + "learning_rate": 2.482384724623854e-05, + "loss": 0.2636, + "step": 12575, + "teacher_loss": 0.22354191541671753 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.31734150648117065, + "learning_rate": 2.482213076044555e-05, + "loss": 0.2549, + "step": 12576, + "teacher_loss": 0.24794290959835052 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.37666282057762146, + "learning_rate": 2.4820414049460556e-05, + "loss": 0.2871, + "step": 12577, + "teacher_loss": 0.2771133780479431 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.4308215081691742, + "learning_rate": 2.4818697113322907e-05, + "loss": 0.2576, + "step": 12578, + "teacher_loss": 0.23832008242607117 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.548661470413208, + "learning_rate": 2.4816979952071974e-05, + "loss": 0.2596, + "step": 12579, + "teacher_loss": 0.2275036871433258 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.23549598455429077, + "learning_rate": 2.4815262565747118e-05, + "loss": 0.2242, + "step": 12580, + "teacher_loss": 0.22291819751262665 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.2603442668914795, + "learning_rate": 2.481354495438772e-05, + "loss": 0.2423, + "step": 12581, + "teacher_loss": 0.24032625555992126 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.2904854714870453, + "learning_rate": 2.4811827118033163e-05, + "loss": 0.2392, + "step": 12582, + "teacher_loss": 0.23345373570919037 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.24639549851417542, + "learning_rate": 2.4810109056722825e-05, + "loss": 0.2424, + "step": 12583, + "teacher_loss": 0.24196115136146545 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.7632948756217957, + "learning_rate": 2.48083907704961e-05, + "loss": 0.399, + "step": 12584, + "teacher_loss": 0.35852521657943726 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5758053064346313, + "learning_rate": 2.480667225939238e-05, + "loss": 0.2988, + "step": 12585, + "teacher_loss": 0.2680344581604004 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.39537566900253296, + "learning_rate": 2.480495352345107e-05, + "loss": 0.2271, + "step": 12586, + "teacher_loss": 0.20835396647453308 + }, + { + "compression_loss": 0.0, + "epoch": 2.27, + "label_loss": 0.5928500294685364, + "learning_rate": 2.4803234562711566e-05, + "loss": 0.3838, + "step": 12587, + "teacher_loss": 0.3605412244796753 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.6006373167037964, + "learning_rate": 2.4801515377213293e-05, + "loss": 0.2344, + "step": 12588, + "teacher_loss": 0.19367870688438416 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.35082510113716125, + "learning_rate": 2.4799795966995654e-05, + "loss": 0.1777, + "step": 12589, + "teacher_loss": 0.158429816365242 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.6805994510650635, + "learning_rate": 2.479807633209808e-05, + "loss": 0.3202, + "step": 12590, + "teacher_loss": 0.2801085114479065 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.6047331094741821, + "learning_rate": 2.4796356472559984e-05, + "loss": 0.3591, + "step": 12591, + "teacher_loss": 0.33181139826774597 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.34327465295791626, + "learning_rate": 2.4794636388420812e-05, + "loss": 0.2174, + "step": 12592, + "teacher_loss": 0.20337122678756714 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.39451533555984497, + "learning_rate": 2.4792916079719988e-05, + "loss": 0.4363, + "step": 12593, + "teacher_loss": 0.44098925590515137 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.20249181985855103, + "learning_rate": 2.4791195546496964e-05, + "loss": 0.175, + "step": 12594, + "teacher_loss": 0.17193642258644104 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.5126531720161438, + "learning_rate": 2.4789474788791178e-05, + "loss": 0.428, + "step": 12595, + "teacher_loss": 0.41857418417930603 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.6117051839828491, + "learning_rate": 2.4787753806642088e-05, + "loss": 0.3943, + "step": 12596, + "teacher_loss": 0.37014323472976685 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.352652370929718, + "learning_rate": 2.4786032600089145e-05, + "loss": 0.2082, + "step": 12597, + "teacher_loss": 0.1921531856060028 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.31507253646850586, + "learning_rate": 2.4784311169171818e-05, + "loss": 0.2205, + "step": 12598, + "teacher_loss": 0.21004696190357208 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.4715031385421753, + "learning_rate": 2.478258951392957e-05, + "loss": 0.2077, + "step": 12599, + "teacher_loss": 0.17837247252464294 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.49409180879592896, + "learning_rate": 2.4780867634401875e-05, + "loss": 0.2671, + "step": 12600, + "teacher_loss": 0.24185073375701904 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.2607385516166687, + "learning_rate": 2.477914553062821e-05, + "loss": 0.2311, + "step": 12601, + "teacher_loss": 0.22775794565677643 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.36936667561531067, + "learning_rate": 2.4777423202648052e-05, + "loss": 0.1558, + "step": 12602, + "teacher_loss": 0.13212105631828308 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.5796270370483398, + "learning_rate": 2.4775700650500897e-05, + "loss": 0.5298, + "step": 12603, + "teacher_loss": 0.5242799520492554 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.3918241858482361, + "learning_rate": 2.4773977874226236e-05, + "loss": 0.208, + "step": 12604, + "teacher_loss": 0.18760943412780762 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.20024292171001434, + "learning_rate": 2.4772254873863564e-05, + "loss": 0.1556, + "step": 12605, + "teacher_loss": 0.1506561040878296 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.40143290162086487, + "learning_rate": 2.4770531649452393e-05, + "loss": 0.2617, + "step": 12606, + "teacher_loss": 0.2462194263935089 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.11884262412786484, + "learning_rate": 2.4768808201032222e-05, + "loss": 0.1853, + "step": 12607, + "teacher_loss": 0.19266246259212494 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.42557787895202637, + "learning_rate": 2.4767084528642564e-05, + "loss": 0.2762, + "step": 12608, + "teacher_loss": 0.25965040922164917 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.6889470815658569, + "learning_rate": 2.4765360632322942e-05, + "loss": 0.5168, + "step": 12609, + "teacher_loss": 0.4976810812950134 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.47595643997192383, + "learning_rate": 2.476363651211288e-05, + "loss": 0.2868, + "step": 12610, + "teacher_loss": 0.2657453715801239 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.3744402527809143, + "learning_rate": 2.4761912168051903e-05, + "loss": 0.5431, + "step": 12611, + "teacher_loss": 0.5617977976799011 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.25561782717704773, + "learning_rate": 2.4760187600179555e-05, + "loss": 0.2317, + "step": 12612, + "teacher_loss": 0.2290431261062622 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.49452996253967285, + "learning_rate": 2.475846280853536e-05, + "loss": 0.2657, + "step": 12613, + "teacher_loss": 0.2402990460395813 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.13165296614170074, + "learning_rate": 2.475673779315887e-05, + "loss": 0.19, + "step": 12614, + "teacher_loss": 0.19649936258792877 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.4186062216758728, + "learning_rate": 2.475501255408964e-05, + "loss": 0.2468, + "step": 12615, + "teacher_loss": 0.22768718004226685 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.36524632573127747, + "learning_rate": 2.4753287091367218e-05, + "loss": 0.2488, + "step": 12616, + "teacher_loss": 0.23591215908527374 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.5232087969779968, + "learning_rate": 2.475156140503116e-05, + "loss": 0.7313, + "step": 12617, + "teacher_loss": 0.754426121711731 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.4180338680744171, + "learning_rate": 2.4749835495121048e-05, + "loss": 0.2435, + "step": 12618, + "teacher_loss": 0.22413134574890137 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.20741084218025208, + "learning_rate": 2.4748109361676427e-05, + "loss": 0.2252, + "step": 12619, + "teacher_loss": 0.22714349627494812 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.6563153862953186, + "learning_rate": 2.4746383004736894e-05, + "loss": 0.4134, + "step": 12620, + "teacher_loss": 0.38638395071029663 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.4777422547340393, + "learning_rate": 2.474465642434201e-05, + "loss": 0.2106, + "step": 12621, + "teacher_loss": 0.1809486448764801 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.37529456615448, + "learning_rate": 2.4742929620531373e-05, + "loss": 0.2244, + "step": 12622, + "teacher_loss": 0.2076016664505005 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.21721863746643066, + "learning_rate": 2.4741202593344575e-05, + "loss": 0.1945, + "step": 12623, + "teacher_loss": 0.19192591309547424 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.4583682417869568, + "learning_rate": 2.4739475342821204e-05, + "loss": 0.2591, + "step": 12624, + "teacher_loss": 0.23691397905349731 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.7615426182746887, + "learning_rate": 2.4737747869000863e-05, + "loss": 0.3782, + "step": 12625, + "teacher_loss": 0.335564523935318 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 1.04924476146698, + "learning_rate": 2.473602017192316e-05, + "loss": 0.3659, + "step": 12626, + "teacher_loss": 0.28997552394866943 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.19185186922550201, + "learning_rate": 2.4734292251627704e-05, + "loss": 0.2216, + "step": 12627, + "teacher_loss": 0.22491714358329773 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.18634033203125, + "learning_rate": 2.4732564108154116e-05, + "loss": 0.263, + "step": 12628, + "teacher_loss": 0.27149099111557007 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.9119465351104736, + "learning_rate": 2.4730835741542007e-05, + "loss": 0.2255, + "step": 12629, + "teacher_loss": 0.14918920397758484 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.23655898869037628, + "learning_rate": 2.472910715183101e-05, + "loss": 0.206, + "step": 12630, + "teacher_loss": 0.2026577889919281 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.7155637741088867, + "learning_rate": 2.472737833906076e-05, + "loss": 0.5321, + "step": 12631, + "teacher_loss": 0.51168292760849 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.832538366317749, + "learning_rate": 2.4725649303270887e-05, + "loss": 0.3579, + "step": 12632, + "teacher_loss": 0.30513080954551697 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.3827417492866516, + "learning_rate": 2.4723920044501028e-05, + "loss": 0.3668, + "step": 12633, + "teacher_loss": 0.3649759888648987 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.3006754517555237, + "learning_rate": 2.4722190562790843e-05, + "loss": 0.2357, + "step": 12634, + "teacher_loss": 0.22851170599460602 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.5746712684631348, + "learning_rate": 2.4720460858179974e-05, + "loss": 0.2796, + "step": 12635, + "teacher_loss": 0.24684450030326843 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.22356471419334412, + "learning_rate": 2.4718730930708083e-05, + "loss": 0.2389, + "step": 12636, + "teacher_loss": 0.24058832228183746 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.3699165880680084, + "learning_rate": 2.471700078041483e-05, + "loss": 0.2287, + "step": 12637, + "teacher_loss": 0.21297301352024078 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.5901683568954468, + "learning_rate": 2.471527040733988e-05, + "loss": 0.4873, + "step": 12638, + "teacher_loss": 0.47582483291625977 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.44878697395324707, + "learning_rate": 2.4713539811522907e-05, + "loss": 0.3, + "step": 12639, + "teacher_loss": 0.28348231315612793 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.7039507031440735, + "learning_rate": 2.471180899300359e-05, + "loss": 0.2676, + "step": 12640, + "teacher_loss": 0.2190612554550171 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.9100903868675232, + "learning_rate": 2.4710077951821615e-05, + "loss": 0.4385, + "step": 12641, + "teacher_loss": 0.38615643978118896 + }, + { + "compression_loss": 0.0, + "epoch": 2.28, + "label_loss": 0.1566615104675293, + "learning_rate": 2.4708346688016658e-05, + "loss": 0.1491, + "step": 12642, + "teacher_loss": 0.148284912109375 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4914247989654541, + "learning_rate": 2.4706615201628423e-05, + "loss": 0.2217, + "step": 12643, + "teacher_loss": 0.19177308678627014 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.33896830677986145, + "learning_rate": 2.47048834926966e-05, + "loss": 0.2658, + "step": 12644, + "teacher_loss": 0.2576187252998352 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.2465217411518097, + "learning_rate": 2.47031515612609e-05, + "loss": 0.1478, + "step": 12645, + "teacher_loss": 0.1367756426334381 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.8291858434677124, + "learning_rate": 2.470141940736102e-05, + "loss": 0.4457, + "step": 12646, + "teacher_loss": 0.40305519104003906 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.632556676864624, + "learning_rate": 2.4699687031036684e-05, + "loss": 0.3488, + "step": 12647, + "teacher_loss": 0.31732529401779175 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4063131511211395, + "learning_rate": 2.4697954432327602e-05, + "loss": 0.2055, + "step": 12648, + "teacher_loss": 0.18320028483867645 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.44793960452079773, + "learning_rate": 2.4696221611273503e-05, + "loss": 0.3355, + "step": 12649, + "teacher_loss": 0.3230014741420746 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.31597214937210083, + "learning_rate": 2.469448856791411e-05, + "loss": 0.209, + "step": 12650, + "teacher_loss": 0.1970748007297516 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.26016974449157715, + "learning_rate": 2.4692755302289168e-05, + "loss": 0.2629, + "step": 12651, + "teacher_loss": 0.2631891369819641 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.65497225522995, + "learning_rate": 2.46910218144384e-05, + "loss": 0.268, + "step": 12652, + "teacher_loss": 0.22499778866767883 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.40610772371292114, + "learning_rate": 2.4689288104401562e-05, + "loss": 0.3379, + "step": 12653, + "teacher_loss": 0.33034467697143555 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.12130075693130493, + "learning_rate": 2.4687554172218395e-05, + "loss": 0.1984, + "step": 12654, + "teacher_loss": 0.20696593821048737 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.2080228626728058, + "learning_rate": 2.4685820017928653e-05, + "loss": 0.2071, + "step": 12655, + "teacher_loss": 0.20703697204589844 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.5996205806732178, + "learning_rate": 2.46840856415721e-05, + "loss": 0.3033, + "step": 12656, + "teacher_loss": 0.27038830518722534 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.32981282472610474, + "learning_rate": 2.4682351043188498e-05, + "loss": 0.1987, + "step": 12657, + "teacher_loss": 0.1841558814048767 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.7262628078460693, + "learning_rate": 2.4680616222817617e-05, + "loss": 0.4945, + "step": 12658, + "teacher_loss": 0.46879836916923523 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.558749258518219, + "learning_rate": 2.4678881180499228e-05, + "loss": 0.4088, + "step": 12659, + "teacher_loss": 0.39215636253356934 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.45009884238243103, + "learning_rate": 2.467714591627311e-05, + "loss": 0.2467, + "step": 12660, + "teacher_loss": 0.22408342361450195 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.17860107123851776, + "learning_rate": 2.4675410430179053e-05, + "loss": 0.2351, + "step": 12661, + "teacher_loss": 0.24140053987503052 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.33198362588882446, + "learning_rate": 2.467367472225685e-05, + "loss": 0.2607, + "step": 12662, + "teacher_loss": 0.2528004050254822 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4819888770580292, + "learning_rate": 2.4671938792546276e-05, + "loss": 0.6162, + "step": 12663, + "teacher_loss": 0.6310882568359375 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.3727151155471802, + "learning_rate": 2.467020264108715e-05, + "loss": 0.2742, + "step": 12664, + "teacher_loss": 0.2632971405982971 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 1.2337837219238281, + "learning_rate": 2.466846626791927e-05, + "loss": 0.5593, + "step": 12665, + "teacher_loss": 0.48441237211227417 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.23402002453804016, + "learning_rate": 2.4666729673082444e-05, + "loss": 0.2933, + "step": 12666, + "teacher_loss": 0.299884170293808 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.13813212513923645, + "learning_rate": 2.466499285661649e-05, + "loss": 0.1937, + "step": 12667, + "teacher_loss": 0.1998957097530365 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.5632753372192383, + "learning_rate": 2.466325581856123e-05, + "loss": 0.3686, + "step": 12668, + "teacher_loss": 0.3469313383102417 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.49237850308418274, + "learning_rate": 2.466151855895648e-05, + "loss": 0.295, + "step": 12669, + "teacher_loss": 0.27306854724884033 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.6426405310630798, + "learning_rate": 2.465978107784208e-05, + "loss": 0.2057, + "step": 12670, + "teacher_loss": 0.15719977021217346 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4500117301940918, + "learning_rate": 2.465804337525786e-05, + "loss": 0.2699, + "step": 12671, + "teacher_loss": 0.24993078410625458 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.6109174489974976, + "learning_rate": 2.4656305451243664e-05, + "loss": 0.2414, + "step": 12672, + "teacher_loss": 0.20038671791553497 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.5285136699676514, + "learning_rate": 2.4654567305839334e-05, + "loss": 0.242, + "step": 12673, + "teacher_loss": 0.21015599370002747 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4114764332771301, + "learning_rate": 2.465282893908472e-05, + "loss": 0.3416, + "step": 12674, + "teacher_loss": 0.33384984731674194 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.5111281871795654, + "learning_rate": 2.4651090351019684e-05, + "loss": 0.5183, + "step": 12675, + "teacher_loss": 0.5191189050674438 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.9804028272628784, + "learning_rate": 2.4649351541684076e-05, + "loss": 0.5613, + "step": 12676, + "teacher_loss": 0.5147008895874023 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.8732770085334778, + "learning_rate": 2.464761251111777e-05, + "loss": 0.6973, + "step": 12677, + "teacher_loss": 0.677788496017456 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.5299378633499146, + "learning_rate": 2.4645873259360635e-05, + "loss": 0.2369, + "step": 12678, + "teacher_loss": 0.20437392592430115 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.3381912112236023, + "learning_rate": 2.464413378645254e-05, + "loss": 0.337, + "step": 12679, + "teacher_loss": 0.3368957042694092 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4966122508049011, + "learning_rate": 2.464239409243338e-05, + "loss": 0.2339, + "step": 12680, + "teacher_loss": 0.20476022362709045 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.2683950662612915, + "learning_rate": 2.4640654177343028e-05, + "loss": 0.2275, + "step": 12681, + "teacher_loss": 0.2229967713356018 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.3283715844154358, + "learning_rate": 2.4638914041221384e-05, + "loss": 0.3041, + "step": 12682, + "teacher_loss": 0.3014216125011444 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.5279498100280762, + "learning_rate": 2.4637173684108333e-05, + "loss": 0.3553, + "step": 12683, + "teacher_loss": 0.3360982835292816 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.20253853499889374, + "learning_rate": 2.4635433106043787e-05, + "loss": 0.2606, + "step": 12684, + "teacher_loss": 0.266999751329422 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.6865276098251343, + "learning_rate": 2.4633692307067654e-05, + "loss": 0.3736, + "step": 12685, + "teacher_loss": 0.3388213515281677 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.500142514705658, + "learning_rate": 2.4631951287219833e-05, + "loss": 0.2231, + "step": 12686, + "teacher_loss": 0.19230876863002777 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.4036378860473633, + "learning_rate": 2.4630210046540246e-05, + "loss": 0.26, + "step": 12687, + "teacher_loss": 0.24406495690345764 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.8144952058792114, + "learning_rate": 2.462846858506882e-05, + "loss": 0.3633, + "step": 12688, + "teacher_loss": 0.313166081905365 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.41987690329551697, + "learning_rate": 2.4626726902845477e-05, + "loss": 0.2733, + "step": 12689, + "teacher_loss": 0.25701674818992615 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.3125081956386566, + "learning_rate": 2.462498499991014e-05, + "loss": 0.1773, + "step": 12690, + "teacher_loss": 0.16227251291275024 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.16725917160511017, + "learning_rate": 2.4623242876302764e-05, + "loss": 0.1751, + "step": 12691, + "teacher_loss": 0.17591989040374756 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.3306730389595032, + "learning_rate": 2.462150053206328e-05, + "loss": 0.1988, + "step": 12692, + "teacher_loss": 0.1841772198677063 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.6826362609863281, + "learning_rate": 2.4619757967231632e-05, + "loss": 0.3099, + "step": 12693, + "teacher_loss": 0.26851630210876465 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.3717074394226074, + "learning_rate": 2.461801518184778e-05, + "loss": 0.2559, + "step": 12694, + "teacher_loss": 0.24304817616939545 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.7762868404388428, + "learning_rate": 2.461627217595167e-05, + "loss": 0.3276, + "step": 12695, + "teacher_loss": 0.277798056602478 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.501883864402771, + "learning_rate": 2.4614528949583273e-05, + "loss": 0.2252, + "step": 12696, + "teacher_loss": 0.19446077942848206 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.7247898578643799, + "learning_rate": 2.4612785502782554e-05, + "loss": 0.2129, + "step": 12697, + "teacher_loss": 0.1560727059841156 + }, + { + "compression_loss": 0.0, + "epoch": 2.29, + "label_loss": 0.6145369410514832, + "learning_rate": 2.4611041835589482e-05, + "loss": 0.3189, + "step": 12698, + "teacher_loss": 0.28607177734375 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.6533224582672119, + "learning_rate": 2.4609297948044035e-05, + "loss": 0.2296, + "step": 12699, + "teacher_loss": 0.18252351880073547 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5860744714736938, + "learning_rate": 2.46075538401862e-05, + "loss": 0.2772, + "step": 12700, + "teacher_loss": 0.2428891509771347 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.32241320610046387, + "learning_rate": 2.4605809512055956e-05, + "loss": 0.1683, + "step": 12701, + "teacher_loss": 0.15118065476417542 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.4126654267311096, + "learning_rate": 2.4604064963693303e-05, + "loss": 0.3635, + "step": 12702, + "teacher_loss": 0.35798850655555725 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.49540072679519653, + "learning_rate": 2.4602320195138232e-05, + "loss": 0.3843, + "step": 12703, + "teacher_loss": 0.3719090223312378 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3412644863128662, + "learning_rate": 2.4600575206430753e-05, + "loss": 0.3166, + "step": 12704, + "teacher_loss": 0.31381258368492126 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.6605757474899292, + "learning_rate": 2.4598829997610867e-05, + "loss": 0.2652, + "step": 12705, + "teacher_loss": 0.22126471996307373 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.41099756956100464, + "learning_rate": 2.4597084568718583e-05, + "loss": 0.2606, + "step": 12706, + "teacher_loss": 0.24391400814056396 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.4479926824569702, + "learning_rate": 2.459533891979393e-05, + "loss": 0.3547, + "step": 12707, + "teacher_loss": 0.3442840576171875 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.711146354675293, + "learning_rate": 2.4593593050876924e-05, + "loss": 0.3534, + "step": 12708, + "teacher_loss": 0.3136245012283325 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.36635780334472656, + "learning_rate": 2.4591846962007583e-05, + "loss": 0.2239, + "step": 12709, + "teacher_loss": 0.2080240696668625 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5478852987289429, + "learning_rate": 2.4590100653225958e-05, + "loss": 0.2378, + "step": 12710, + "teacher_loss": 0.203330397605896 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.8523598909378052, + "learning_rate": 2.4588354124572076e-05, + "loss": 0.5593, + "step": 12711, + "teacher_loss": 0.5267688632011414 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2741791307926178, + "learning_rate": 2.458660737608598e-05, + "loss": 0.2045, + "step": 12712, + "teacher_loss": 0.19675593078136444 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.14311210811138153, + "learning_rate": 2.458486040780772e-05, + "loss": 0.1528, + "step": 12713, + "teacher_loss": 0.1538282036781311 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.6791624426841736, + "learning_rate": 2.4583113219777346e-05, + "loss": 0.3526, + "step": 12714, + "teacher_loss": 0.316366970539093 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3931400179862976, + "learning_rate": 2.4581365812034917e-05, + "loss": 0.204, + "step": 12715, + "teacher_loss": 0.18297532200813293 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.28954386711120605, + "learning_rate": 2.45796181846205e-05, + "loss": 0.2521, + "step": 12716, + "teacher_loss": 0.24791626632213593 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2832193970680237, + "learning_rate": 2.4577870337574158e-05, + "loss": 0.2633, + "step": 12717, + "teacher_loss": 0.2611054480075836 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.41847631335258484, + "learning_rate": 2.457612227093596e-05, + "loss": 0.2211, + "step": 12718, + "teacher_loss": 0.1992010772228241 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.48299989104270935, + "learning_rate": 2.4574373984745996e-05, + "loss": 0.2396, + "step": 12719, + "teacher_loss": 0.21255764365196228 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5592890977859497, + "learning_rate": 2.457262547904434e-05, + "loss": 0.3456, + "step": 12720, + "teacher_loss": 0.3218947649002075 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3585044741630554, + "learning_rate": 2.4570876753871083e-05, + "loss": 0.3714, + "step": 12721, + "teacher_loss": 0.3728066086769104 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.4857742190361023, + "learning_rate": 2.4569127809266315e-05, + "loss": 0.2593, + "step": 12722, + "teacher_loss": 0.23412850499153137 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3344912827014923, + "learning_rate": 2.4567378645270134e-05, + "loss": 0.4273, + "step": 12723, + "teacher_loss": 0.43757182359695435 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.7104486227035522, + "learning_rate": 2.456562926192265e-05, + "loss": 0.2779, + "step": 12724, + "teacher_loss": 0.22980816662311554 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.26461973786354065, + "learning_rate": 2.4563879659263964e-05, + "loss": 0.2036, + "step": 12725, + "teacher_loss": 0.19678860902786255 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2016523778438568, + "learning_rate": 2.4562129837334192e-05, + "loss": 0.2153, + "step": 12726, + "teacher_loss": 0.2168487012386322 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3421795070171356, + "learning_rate": 2.4560379796173452e-05, + "loss": 0.3832, + "step": 12727, + "teacher_loss": 0.38770729303359985 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5051214694976807, + "learning_rate": 2.4558629535821863e-05, + "loss": 0.2918, + "step": 12728, + "teacher_loss": 0.26808029413223267 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2910989820957184, + "learning_rate": 2.4556879056319557e-05, + "loss": 0.2564, + "step": 12729, + "teacher_loss": 0.252596914768219 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2521718442440033, + "learning_rate": 2.455512835770667e-05, + "loss": 0.1889, + "step": 12730, + "teacher_loss": 0.18191729485988617 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.9507643580436707, + "learning_rate": 2.455337744002334e-05, + "loss": 0.4776, + "step": 12731, + "teacher_loss": 0.4250204563140869 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 1.057058334350586, + "learning_rate": 2.4551626303309703e-05, + "loss": 0.3406, + "step": 12732, + "teacher_loss": 0.2610388398170471 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.8102385997772217, + "learning_rate": 2.4549874947605915e-05, + "loss": 0.2968, + "step": 12733, + "teacher_loss": 0.23970946669578552 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.30617085099220276, + "learning_rate": 2.4548123372952126e-05, + "loss": 0.2129, + "step": 12734, + "teacher_loss": 0.20250718295574188 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5361084342002869, + "learning_rate": 2.4546371579388496e-05, + "loss": 0.3743, + "step": 12735, + "teacher_loss": 0.35636162757873535 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.49368131160736084, + "learning_rate": 2.4544619566955185e-05, + "loss": 0.3168, + "step": 12736, + "teacher_loss": 0.29718470573425293 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3797915279865265, + "learning_rate": 2.4542867335692362e-05, + "loss": 0.1948, + "step": 12737, + "teacher_loss": 0.17422893643379211 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.7465950846672058, + "learning_rate": 2.4541114885640207e-05, + "loss": 0.572, + "step": 12738, + "teacher_loss": 0.5526261329650879 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2563946843147278, + "learning_rate": 2.4539362216838893e-05, + "loss": 0.2088, + "step": 12739, + "teacher_loss": 0.2034914195537567 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5228832364082336, + "learning_rate": 2.4537609329328597e-05, + "loss": 0.2076, + "step": 12740, + "teacher_loss": 0.17252935469150543 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.667177677154541, + "learning_rate": 2.4535856223149525e-05, + "loss": 0.366, + "step": 12741, + "teacher_loss": 0.33253538608551025 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5715077519416809, + "learning_rate": 2.453410289834185e-05, + "loss": 0.2295, + "step": 12742, + "teacher_loss": 0.19145861268043518 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.41520828008651733, + "learning_rate": 2.453234935494578e-05, + "loss": 0.2576, + "step": 12743, + "teacher_loss": 0.24007537961006165 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5495928525924683, + "learning_rate": 2.4530595593001527e-05, + "loss": 0.3345, + "step": 12744, + "teacher_loss": 0.3105456233024597 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.7105432152748108, + "learning_rate": 2.4528841612549284e-05, + "loss": 0.4395, + "step": 12745, + "teacher_loss": 0.40933847427368164 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2253831923007965, + "learning_rate": 2.452708741362928e-05, + "loss": 0.346, + "step": 12746, + "teacher_loss": 0.3594166040420532 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.696350634098053, + "learning_rate": 2.4525332996281716e-05, + "loss": 0.257, + "step": 12747, + "teacher_loss": 0.20816929638385773 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.2134370505809784, + "learning_rate": 2.4523578360546828e-05, + "loss": 0.3286, + "step": 12748, + "teacher_loss": 0.34137117862701416 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5214452743530273, + "learning_rate": 2.4521823506464844e-05, + "loss": 0.3039, + "step": 12749, + "teacher_loss": 0.2797281742095947 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.5118767023086548, + "learning_rate": 2.4520068434075995e-05, + "loss": 0.3028, + "step": 12750, + "teacher_loss": 0.27952322363853455 + }, + { + "epoch": 2.3, + "eval_exact_match": 79.70671712393566, + "eval_f1": 86.9584835966808, + "step": 12750 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.8415837287902832, + "learning_rate": 2.4518313143420514e-05, + "loss": 0.3539, + "step": 12751, + "teacher_loss": 0.29970085620880127 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.3951093256473541, + "learning_rate": 2.4516557634538657e-05, + "loss": 0.2491, + "step": 12752, + "teacher_loss": 0.2329024374485016 + }, + { + "compression_loss": 0.0, + "epoch": 2.3, + "label_loss": 0.4233248829841614, + "learning_rate": 2.451480190747066e-05, + "loss": 0.179, + "step": 12753, + "teacher_loss": 0.1518113911151886 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.626502275466919, + "learning_rate": 2.4513045962256788e-05, + "loss": 0.2675, + "step": 12754, + "teacher_loss": 0.22757482528686523 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.46928292512893677, + "learning_rate": 2.4511289798937285e-05, + "loss": 0.3684, + "step": 12755, + "teacher_loss": 0.35724061727523804 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.48779958486557007, + "learning_rate": 2.450953341755243e-05, + "loss": 0.3318, + "step": 12756, + "teacher_loss": 0.3144574761390686 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5141881704330444, + "learning_rate": 2.4507776818142483e-05, + "loss": 0.2522, + "step": 12757, + "teacher_loss": 0.2230379581451416 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4260299801826477, + "learning_rate": 2.4506020000747717e-05, + "loss": 0.2063, + "step": 12758, + "teacher_loss": 0.181864395737648 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.41852647066116333, + "learning_rate": 2.4504262965408415e-05, + "loss": 0.2698, + "step": 12759, + "teacher_loss": 0.2532217502593994 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.2920951843261719, + "learning_rate": 2.4502505712164863e-05, + "loss": 0.2895, + "step": 12760, + "teacher_loss": 0.28918135166168213 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.3665492534637451, + "learning_rate": 2.450074824105734e-05, + "loss": 0.1975, + "step": 12761, + "teacher_loss": 0.17876750230789185 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5575348734855652, + "learning_rate": 2.4498990552126144e-05, + "loss": 0.2254, + "step": 12762, + "teacher_loss": 0.18852347135543823 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6259362697601318, + "learning_rate": 2.4497232645411575e-05, + "loss": 0.5717, + "step": 12763, + "teacher_loss": 0.5657031536102295 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.35016322135925293, + "learning_rate": 2.4495474520953938e-05, + "loss": 0.2699, + "step": 12764, + "teacher_loss": 0.26096174120903015 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6094595193862915, + "learning_rate": 2.4493716178793537e-05, + "loss": 0.5837, + "step": 12765, + "teacher_loss": 0.5808587074279785 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5083569288253784, + "learning_rate": 2.449195761897069e-05, + "loss": 0.2551, + "step": 12766, + "teacher_loss": 0.22692260146141052 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4897061586380005, + "learning_rate": 2.4490198841525708e-05, + "loss": 0.2157, + "step": 12767, + "teacher_loss": 0.18526628613471985 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.18948040902614594, + "learning_rate": 2.4488439846498924e-05, + "loss": 0.2087, + "step": 12768, + "teacher_loss": 0.21088364720344543 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.47084784507751465, + "learning_rate": 2.4486680633930658e-05, + "loss": 0.279, + "step": 12769, + "teacher_loss": 0.25768622756004333 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4271124005317688, + "learning_rate": 2.448492120386125e-05, + "loss": 0.2873, + "step": 12770, + "teacher_loss": 0.2717765271663666 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5003107190132141, + "learning_rate": 2.448316155633104e-05, + "loss": 0.3744, + "step": 12771, + "teacher_loss": 0.3604600131511688 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.35192644596099854, + "learning_rate": 2.4481401691380362e-05, + "loss": 0.3499, + "step": 12772, + "teacher_loss": 0.3497084379196167 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.40583306550979614, + "learning_rate": 2.447964160904957e-05, + "loss": 0.2442, + "step": 12773, + "teacher_loss": 0.22625015676021576 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4257432818412781, + "learning_rate": 2.4477881309379024e-05, + "loss": 0.2118, + "step": 12774, + "teacher_loss": 0.18802779912948608 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.7121524214744568, + "learning_rate": 2.447612079240907e-05, + "loss": 0.3702, + "step": 12775, + "teacher_loss": 0.33223316073417664 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4624972343444824, + "learning_rate": 2.447436005818008e-05, + "loss": 0.2369, + "step": 12776, + "teacher_loss": 0.21184581518173218 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.34011220932006836, + "learning_rate": 2.4472599106732413e-05, + "loss": 0.2076, + "step": 12777, + "teacher_loss": 0.19285979866981506 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5103950500488281, + "learning_rate": 2.4470837938106452e-05, + "loss": 0.2585, + "step": 12778, + "teacher_loss": 0.23056337237358093 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4049499034881592, + "learning_rate": 2.4469076552342575e-05, + "loss": 0.2973, + "step": 12779, + "teacher_loss": 0.2852926552295685 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.3633689284324646, + "learning_rate": 2.4467314949481162e-05, + "loss": 0.2364, + "step": 12780, + "teacher_loss": 0.2223348617553711 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6356157064437866, + "learning_rate": 2.44655531295626e-05, + "loss": 0.3848, + "step": 12781, + "teacher_loss": 0.356976181268692 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.50611811876297, + "learning_rate": 2.4463791092627284e-05, + "loss": 0.2772, + "step": 12782, + "teacher_loss": 0.2517325282096863 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5781041383743286, + "learning_rate": 2.4462028838715613e-05, + "loss": 0.2358, + "step": 12783, + "teacher_loss": 0.1977129578590393 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.3778180480003357, + "learning_rate": 2.446026636786799e-05, + "loss": 0.201, + "step": 12784, + "teacher_loss": 0.18131864070892334 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.39219459891319275, + "learning_rate": 2.4458503680124818e-05, + "loss": 0.4406, + "step": 12785, + "teacher_loss": 0.4460013806819916 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.22727757692337036, + "learning_rate": 2.4456740775526523e-05, + "loss": 0.194, + "step": 12786, + "teacher_loss": 0.19034844636917114 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4749402403831482, + "learning_rate": 2.4454977654113512e-05, + "loss": 0.2972, + "step": 12787, + "teacher_loss": 0.2774598002433777 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6500797271728516, + "learning_rate": 2.4453214315926203e-05, + "loss": 0.5804, + "step": 12788, + "teacher_loss": 0.5726256370544434 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.17040009796619415, + "learning_rate": 2.445145076100504e-05, + "loss": 0.27, + "step": 12789, + "teacher_loss": 0.28105106949806213 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6615462303161621, + "learning_rate": 2.4449686989390447e-05, + "loss": 0.2726, + "step": 12790, + "teacher_loss": 0.22943192720413208 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.7984317541122437, + "learning_rate": 2.444792300112286e-05, + "loss": 0.7198, + "step": 12791, + "teacher_loss": 0.7110674381256104 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6699482202529907, + "learning_rate": 2.444615879624273e-05, + "loss": 0.3182, + "step": 12792, + "teacher_loss": 0.2791202664375305 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.3365625739097595, + "learning_rate": 2.4444394374790493e-05, + "loss": 0.2111, + "step": 12793, + "teacher_loss": 0.19713972508907318 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.6909478902816772, + "learning_rate": 2.4442629736806613e-05, + "loss": 0.239, + "step": 12794, + "teacher_loss": 0.18882140517234802 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.2484227865934372, + "learning_rate": 2.4440864882331544e-05, + "loss": 0.2787, + "step": 12795, + "teacher_loss": 0.28203296661376953 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.8718396425247192, + "learning_rate": 2.4439099811405747e-05, + "loss": 0.5807, + "step": 12796, + "teacher_loss": 0.5483235716819763 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.33335673809051514, + "learning_rate": 2.4437334524069693e-05, + "loss": 0.2481, + "step": 12797, + "teacher_loss": 0.23857778310775757 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.34471312165260315, + "learning_rate": 2.443556902036385e-05, + "loss": 0.2114, + "step": 12798, + "teacher_loss": 0.19653251767158508 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5021098852157593, + "learning_rate": 2.44338033003287e-05, + "loss": 0.2408, + "step": 12799, + "teacher_loss": 0.21180084347724915 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.43334415555000305, + "learning_rate": 2.4432037364004725e-05, + "loss": 0.4199, + "step": 12800, + "teacher_loss": 0.4184553623199463 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.34729498624801636, + "learning_rate": 2.4430271211432413e-05, + "loss": 0.2348, + "step": 12801, + "teacher_loss": 0.22225135564804077 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.4381135404109955, + "learning_rate": 2.442850484265225e-05, + "loss": 0.2947, + "step": 12802, + "teacher_loss": 0.2787514626979828 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5590059757232666, + "learning_rate": 2.4426738257704745e-05, + "loss": 0.2537, + "step": 12803, + "teacher_loss": 0.21981194615364075 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.1918146014213562, + "learning_rate": 2.4424971456630395e-05, + "loss": 0.2044, + "step": 12804, + "teacher_loss": 0.2057977318763733 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5542712807655334, + "learning_rate": 2.4423204439469703e-05, + "loss": 0.3578, + "step": 12805, + "teacher_loss": 0.3360062539577484 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.38596487045288086, + "learning_rate": 2.442143720626319e-05, + "loss": 0.2367, + "step": 12806, + "teacher_loss": 0.2201429009437561 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.366477370262146, + "learning_rate": 2.441966975705136e-05, + "loss": 0.3205, + "step": 12807, + "teacher_loss": 0.315398633480072 + }, + { + "compression_loss": 0.0, + "epoch": 2.31, + "label_loss": 0.5178214311599731, + "learning_rate": 2.441790209187475e-05, + "loss": 0.4788, + "step": 12808, + "teacher_loss": 0.47442692518234253 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.1455693542957306, + "learning_rate": 2.4416134210773884e-05, + "loss": 0.2032, + "step": 12809, + "teacher_loss": 0.2095700055360794 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4795738756656647, + "learning_rate": 2.441436611378929e-05, + "loss": 0.3943, + "step": 12810, + "teacher_loss": 0.38480839133262634 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.12746132910251617, + "learning_rate": 2.4412597800961506e-05, + "loss": 0.1341, + "step": 12811, + "teacher_loss": 0.13479305803775787 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5107969045639038, + "learning_rate": 2.4410829272331073e-05, + "loss": 0.2647, + "step": 12812, + "teacher_loss": 0.23736470937728882 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5077449083328247, + "learning_rate": 2.440906052793854e-05, + "loss": 0.234, + "step": 12813, + "teacher_loss": 0.20358508825302124 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.28745418787002563, + "learning_rate": 2.4407291567824462e-05, + "loss": 0.219, + "step": 12814, + "teacher_loss": 0.2114378660917282 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.756926417350769, + "learning_rate": 2.440552239202939e-05, + "loss": 0.3529, + "step": 12815, + "teacher_loss": 0.30806252360343933 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.2376304715871811, + "learning_rate": 2.4403753000593892e-05, + "loss": 0.1985, + "step": 12816, + "teacher_loss": 0.19418640434741974 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.35985973477363586, + "learning_rate": 2.440198339355853e-05, + "loss": 0.3249, + "step": 12817, + "teacher_loss": 0.32096678018569946 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5735594034194946, + "learning_rate": 2.440021357096388e-05, + "loss": 0.2514, + "step": 12818, + "teacher_loss": 0.21555694937705994 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.40221551060676575, + "learning_rate": 2.4398443532850512e-05, + "loss": 0.2101, + "step": 12819, + "teacher_loss": 0.1887645721435547 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5203587412834167, + "learning_rate": 2.439667327925901e-05, + "loss": 0.2435, + "step": 12820, + "teacher_loss": 0.21272984147071838 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.3704550266265869, + "learning_rate": 2.4394902810229967e-05, + "loss": 0.3303, + "step": 12821, + "teacher_loss": 0.32578545808792114 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.3017027676105499, + "learning_rate": 2.4393132125803965e-05, + "loss": 0.2913, + "step": 12822, + "teacher_loss": 0.290158748626709 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.46001148223876953, + "learning_rate": 2.4391361226021613e-05, + "loss": 0.3411, + "step": 12823, + "teacher_loss": 0.32784056663513184 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.7779672741889954, + "learning_rate": 2.4389590110923498e-05, + "loss": 0.2237, + "step": 12824, + "teacher_loss": 0.16211147606372833 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.8369835615158081, + "learning_rate": 2.4387818780550236e-05, + "loss": 0.3391, + "step": 12825, + "teacher_loss": 0.2838076949119568 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5473663210868835, + "learning_rate": 2.438604723494244e-05, + "loss": 0.2696, + "step": 12826, + "teacher_loss": 0.23878192901611328 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.2935411334037781, + "learning_rate": 2.438427547414071e-05, + "loss": 0.1478, + "step": 12827, + "teacher_loss": 0.13162542879581451 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5983997583389282, + "learning_rate": 2.438250349818569e-05, + "loss": 0.2661, + "step": 12828, + "teacher_loss": 0.2292214334011078 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.6560078263282776, + "learning_rate": 2.438073130711799e-05, + "loss": 0.293, + "step": 12829, + "teacher_loss": 0.2526160478591919 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4568673372268677, + "learning_rate": 2.4378958900978246e-05, + "loss": 0.49, + "step": 12830, + "teacher_loss": 0.493631511926651 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.43570154905319214, + "learning_rate": 2.4377186279807098e-05, + "loss": 0.2768, + "step": 12831, + "teacher_loss": 0.2591586410999298 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4226142168045044, + "learning_rate": 2.437541344364518e-05, + "loss": 0.2951, + "step": 12832, + "teacher_loss": 0.2809029519557953 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.6435673236846924, + "learning_rate": 2.4373640392533136e-05, + "loss": 0.234, + "step": 12833, + "teacher_loss": 0.18852336704730988 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4074741005897522, + "learning_rate": 2.4371867126511627e-05, + "loss": 0.2719, + "step": 12834, + "teacher_loss": 0.2568800449371338 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.28506845235824585, + "learning_rate": 2.4370093645621306e-05, + "loss": 0.2323, + "step": 12835, + "teacher_loss": 0.22639970481395721 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.6510648727416992, + "learning_rate": 2.4368319949902826e-05, + "loss": 0.2479, + "step": 12836, + "teacher_loss": 0.20315083861351013 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.593468964099884, + "learning_rate": 2.4366546039396858e-05, + "loss": 0.3265, + "step": 12837, + "teacher_loss": 0.2967977523803711 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.553874671459198, + "learning_rate": 2.436477191414407e-05, + "loss": 0.3117, + "step": 12838, + "teacher_loss": 0.2848353385925293 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.8633517026901245, + "learning_rate": 2.436299757418514e-05, + "loss": 0.3394, + "step": 12839, + "teacher_loss": 0.2811833620071411 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.384809672832489, + "learning_rate": 2.4361223019560748e-05, + "loss": 0.2539, + "step": 12840, + "teacher_loss": 0.23934416472911835 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.3511003255844116, + "learning_rate": 2.435944825031158e-05, + "loss": 0.2505, + "step": 12841, + "teacher_loss": 0.23937579989433289 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.664929986000061, + "learning_rate": 2.4357673266478325e-05, + "loss": 0.3503, + "step": 12842, + "teacher_loss": 0.31534427404403687 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4717535376548767, + "learning_rate": 2.4355898068101676e-05, + "loss": 0.3487, + "step": 12843, + "teacher_loss": 0.33506864309310913 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.47175133228302, + "learning_rate": 2.435412265522233e-05, + "loss": 0.2879, + "step": 12844, + "teacher_loss": 0.26751506328582764 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5003417730331421, + "learning_rate": 2.4352347027881003e-05, + "loss": 0.495, + "step": 12845, + "teacher_loss": 0.49436646699905396 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.9326711893081665, + "learning_rate": 2.4350571186118398e-05, + "loss": 0.289, + "step": 12846, + "teacher_loss": 0.21743376553058624 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.6477702856063843, + "learning_rate": 2.4348795129975226e-05, + "loss": 0.2612, + "step": 12847, + "teacher_loss": 0.21828335523605347 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.7703279256820679, + "learning_rate": 2.4347018859492218e-05, + "loss": 0.3405, + "step": 12848, + "teacher_loss": 0.2926962077617645 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.29670923948287964, + "learning_rate": 2.4345242374710086e-05, + "loss": 0.1722, + "step": 12849, + "teacher_loss": 0.1583748459815979 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.32970547676086426, + "learning_rate": 2.4343465675669565e-05, + "loss": 0.2327, + "step": 12850, + "teacher_loss": 0.22187373042106628 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.37952157855033875, + "learning_rate": 2.434168876241139e-05, + "loss": 0.3122, + "step": 12851, + "teacher_loss": 0.30476221442222595 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.6123507022857666, + "learning_rate": 2.4339911634976298e-05, + "loss": 0.2425, + "step": 12852, + "teacher_loss": 0.20138241350650787 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.9636555910110474, + "learning_rate": 2.433813429340504e-05, + "loss": 0.4365, + "step": 12853, + "teacher_loss": 0.3779287338256836 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.3442821800708771, + "learning_rate": 2.4336356737738354e-05, + "loss": 0.2589, + "step": 12854, + "teacher_loss": 0.24940142035484314 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5900931358337402, + "learning_rate": 2.4334578968017003e-05, + "loss": 0.2551, + "step": 12855, + "teacher_loss": 0.21786624193191528 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.5348082184791565, + "learning_rate": 2.4332800984281743e-05, + "loss": 0.2973, + "step": 12856, + "teacher_loss": 0.27087366580963135 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4526482820510864, + "learning_rate": 2.4331022786573336e-05, + "loss": 0.2881, + "step": 12857, + "teacher_loss": 0.2698673605918884 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.9048672914505005, + "learning_rate": 2.4329244374932552e-05, + "loss": 0.3537, + "step": 12858, + "teacher_loss": 0.29248857498168945 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.6123314499855042, + "learning_rate": 2.4327465749400167e-05, + "loss": 0.292, + "step": 12859, + "teacher_loss": 0.25638720393180847 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.16980476677417755, + "learning_rate": 2.4325686910016957e-05, + "loss": 0.2306, + "step": 12860, + "teacher_loss": 0.23734743893146515 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.3036035895347595, + "learning_rate": 2.4323907856823705e-05, + "loss": 0.2395, + "step": 12861, + "teacher_loss": 0.2323692888021469 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.7467605471611023, + "learning_rate": 2.4322128589861204e-05, + "loss": 0.4394, + "step": 12862, + "teacher_loss": 0.40528684854507446 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.4392651319503784, + "learning_rate": 2.432034910917024e-05, + "loss": 0.2473, + "step": 12863, + "teacher_loss": 0.22596198320388794 + }, + { + "compression_loss": 0.0, + "epoch": 2.32, + "label_loss": 0.2262078821659088, + "learning_rate": 2.4318569414791614e-05, + "loss": 0.2982, + "step": 12864, + "teacher_loss": 0.3061489164829254 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.5485791563987732, + "learning_rate": 2.431678950676613e-05, + "loss": 0.4582, + "step": 12865, + "teacher_loss": 0.44810742139816284 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4318222999572754, + "learning_rate": 2.4315009385134597e-05, + "loss": 0.3681, + "step": 12866, + "teacher_loss": 0.3609977960586548 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3778802752494812, + "learning_rate": 2.4313229049937826e-05, + "loss": 0.2738, + "step": 12867, + "teacher_loss": 0.26223719120025635 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.32510340213775635, + "learning_rate": 2.4311448501216636e-05, + "loss": 0.2605, + "step": 12868, + "teacher_loss": 0.2533513903617859 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6053298115730286, + "learning_rate": 2.430966773901185e-05, + "loss": 0.2939, + "step": 12869, + "teacher_loss": 0.2592756748199463 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4363131821155548, + "learning_rate": 2.4307886763364293e-05, + "loss": 0.271, + "step": 12870, + "teacher_loss": 0.2526601254940033 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6778519153594971, + "learning_rate": 2.4306105574314797e-05, + "loss": 0.3893, + "step": 12871, + "teacher_loss": 0.35726216435432434 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.5926095247268677, + "learning_rate": 2.4304324171904208e-05, + "loss": 0.292, + "step": 12872, + "teacher_loss": 0.25857964158058167 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6763759851455688, + "learning_rate": 2.4302542556173358e-05, + "loss": 0.5491, + "step": 12873, + "teacher_loss": 0.5350015163421631 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.46671485900878906, + "learning_rate": 2.4300760727163094e-05, + "loss": 0.2788, + "step": 12874, + "teacher_loss": 0.25795796513557434 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.22786307334899902, + "learning_rate": 2.4298978684914275e-05, + "loss": 0.1963, + "step": 12875, + "teacher_loss": 0.19277650117874146 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6583261489868164, + "learning_rate": 2.4297196429467755e-05, + "loss": 0.3046, + "step": 12876, + "teacher_loss": 0.2652910649776459 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.47161003947257996, + "learning_rate": 2.4295413960864397e-05, + "loss": 0.3226, + "step": 12877, + "teacher_loss": 0.3059903383255005 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 1.1870452165603638, + "learning_rate": 2.429363127914506e-05, + "loss": 1.0165, + "step": 12878, + "teacher_loss": 0.9975295066833496 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.43254750967025757, + "learning_rate": 2.4291848384350627e-05, + "loss": 0.3582, + "step": 12879, + "teacher_loss": 0.3499205708503723 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4510774612426758, + "learning_rate": 2.4290065276521967e-05, + "loss": 0.2167, + "step": 12880, + "teacher_loss": 0.19068217277526855 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.19737738370895386, + "learning_rate": 2.4288281955699963e-05, + "loss": 0.2152, + "step": 12881, + "teacher_loss": 0.2171318233013153 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4947117567062378, + "learning_rate": 2.42864984219255e-05, + "loss": 0.2662, + "step": 12882, + "teacher_loss": 0.24084588885307312 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4688015580177307, + "learning_rate": 2.4284714675239476e-05, + "loss": 0.3399, + "step": 12883, + "teacher_loss": 0.325562447309494 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.7933242321014404, + "learning_rate": 2.428293071568278e-05, + "loss": 0.3408, + "step": 12884, + "teacher_loss": 0.2905702292919159 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.7716907858848572, + "learning_rate": 2.428114654329631e-05, + "loss": 0.286, + "step": 12885, + "teacher_loss": 0.23205523192882538 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3969205617904663, + "learning_rate": 2.4279362158120976e-05, + "loss": 0.3129, + "step": 12886, + "teacher_loss": 0.3036167025566101 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3845710754394531, + "learning_rate": 2.427757756019769e-05, + "loss": 0.43, + "step": 12887, + "teacher_loss": 0.43506473302841187 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3012787401676178, + "learning_rate": 2.427579274956737e-05, + "loss": 0.1644, + "step": 12888, + "teacher_loss": 0.14919152855873108 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.5357356071472168, + "learning_rate": 2.427400772627093e-05, + "loss": 0.2243, + "step": 12889, + "teacher_loss": 0.1897270530462265 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.2902115285396576, + "learning_rate": 2.4272222490349296e-05, + "loss": 0.2089, + "step": 12890, + "teacher_loss": 0.19987821578979492 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.677614688873291, + "learning_rate": 2.4270437041843402e-05, + "loss": 0.5213, + "step": 12891, + "teacher_loss": 0.5039756298065186 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.342756986618042, + "learning_rate": 2.426865138079418e-05, + "loss": 0.1997, + "step": 12892, + "teacher_loss": 0.18376266956329346 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3064303398132324, + "learning_rate": 2.4266865507242567e-05, + "loss": 0.2505, + "step": 12893, + "teacher_loss": 0.2442825436592102 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6494128704071045, + "learning_rate": 2.426507942122952e-05, + "loss": 0.3287, + "step": 12894, + "teacher_loss": 0.29306161403656006 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.7751800417900085, + "learning_rate": 2.4263293122795975e-05, + "loss": 0.2899, + "step": 12895, + "teacher_loss": 0.2360188364982605 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.19788327813148499, + "learning_rate": 2.426150661198289e-05, + "loss": 0.2246, + "step": 12896, + "teacher_loss": 0.22758856415748596 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.5023260712623596, + "learning_rate": 2.425971988883123e-05, + "loss": 0.3587, + "step": 12897, + "teacher_loss": 0.3427194654941559 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.34856122732162476, + "learning_rate": 2.425793295338195e-05, + "loss": 0.2399, + "step": 12898, + "teacher_loss": 0.2278183251619339 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3226652443408966, + "learning_rate": 2.4256145805676028e-05, + "loss": 0.2796, + "step": 12899, + "teacher_loss": 0.27479395270347595 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4358535408973694, + "learning_rate": 2.425435844575443e-05, + "loss": 0.2926, + "step": 12900, + "teacher_loss": 0.27668309211730957 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3327135741710663, + "learning_rate": 2.4252570873658147e-05, + "loss": 0.1885, + "step": 12901, + "teacher_loss": 0.1724540889263153 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.5379756093025208, + "learning_rate": 2.425078308942815e-05, + "loss": 0.2289, + "step": 12902, + "teacher_loss": 0.194538876414299 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.16775484383106232, + "learning_rate": 2.4248995093105432e-05, + "loss": 0.1744, + "step": 12903, + "teacher_loss": 0.17518025636672974 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.7153120040893555, + "learning_rate": 2.4247206884730982e-05, + "loss": 0.2966, + "step": 12904, + "teacher_loss": 0.250084787607193 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.5726916790008545, + "learning_rate": 2.4245418464345805e-05, + "loss": 0.2916, + "step": 12905, + "teacher_loss": 0.2603141963481903 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4702540934085846, + "learning_rate": 2.4243629831990905e-05, + "loss": 0.1945, + "step": 12906, + "teacher_loss": 0.16389453411102295 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.10126000642776489, + "learning_rate": 2.4241840987707283e-05, + "loss": 0.2282, + "step": 12907, + "teacher_loss": 0.24225324392318726 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3827143609523773, + "learning_rate": 2.424005193153596e-05, + "loss": 0.2177, + "step": 12908, + "teacher_loss": 0.19934983551502228 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 1.3696250915527344, + "learning_rate": 2.4238262663517944e-05, + "loss": 0.4843, + "step": 12909, + "teacher_loss": 0.3858788013458252 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.35199326276779175, + "learning_rate": 2.4236473183694267e-05, + "loss": 0.1789, + "step": 12910, + "teacher_loss": 0.15964165329933167 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4294697940349579, + "learning_rate": 2.4234683492105953e-05, + "loss": 0.1948, + "step": 12911, + "teacher_loss": 0.16868704557418823 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4898097515106201, + "learning_rate": 2.423289358879403e-05, + "loss": 0.428, + "step": 12912, + "teacher_loss": 0.4211471378803253 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6356112957000732, + "learning_rate": 2.423110347379954e-05, + "loss": 0.277, + "step": 12913, + "teacher_loss": 0.2371285855770111 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4554620385169983, + "learning_rate": 2.4229313147163522e-05, + "loss": 0.1964, + "step": 12914, + "teacher_loss": 0.16757169365882874 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.6927334666252136, + "learning_rate": 2.4227522608927027e-05, + "loss": 0.5312, + "step": 12915, + "teacher_loss": 0.5132949352264404 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.2921598255634308, + "learning_rate": 2.4225731859131104e-05, + "loss": 0.1969, + "step": 12916, + "teacher_loss": 0.18632224202156067 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.3445168435573578, + "learning_rate": 2.422394089781681e-05, + "loss": 0.2861, + "step": 12917, + "teacher_loss": 0.27960968017578125 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.44665610790252686, + "learning_rate": 2.4222149725025204e-05, + "loss": 0.2412, + "step": 12918, + "teacher_loss": 0.21832284331321716 + }, + { + "compression_loss": 0.0, + "epoch": 2.33, + "label_loss": 0.4019852578639984, + "learning_rate": 2.4220358340797355e-05, + "loss": 0.286, + "step": 12919, + "teacher_loss": 0.27310192584991455 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.8094888925552368, + "learning_rate": 2.4218566745174334e-05, + "loss": 0.3434, + "step": 12920, + "teacher_loss": 0.29155802726745605 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.22321835160255432, + "learning_rate": 2.4216774938197217e-05, + "loss": 0.2474, + "step": 12921, + "teacher_loss": 0.25004521012306213 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.10976250469684601, + "learning_rate": 2.4214982919907087e-05, + "loss": 0.1576, + "step": 12922, + "teacher_loss": 0.16286514699459076 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5718361139297485, + "learning_rate": 2.4213190690345018e-05, + "loss": 0.2914, + "step": 12923, + "teacher_loss": 0.26018911600112915 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.12915055453777313, + "learning_rate": 2.4211398249552116e-05, + "loss": 0.2025, + "step": 12924, + "teacher_loss": 0.21062982082366943 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4890318512916565, + "learning_rate": 2.4209605597569465e-05, + "loss": 0.3664, + "step": 12925, + "teacher_loss": 0.352780282497406 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5125355124473572, + "learning_rate": 2.4207812734438167e-05, + "loss": 0.2228, + "step": 12926, + "teacher_loss": 0.19059734046459198 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.8767570853233337, + "learning_rate": 2.420601966019934e-05, + "loss": 0.2796, + "step": 12927, + "teacher_loss": 0.2132851928472519 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.3248770833015442, + "learning_rate": 2.4204226374894078e-05, + "loss": 0.264, + "step": 12928, + "teacher_loss": 0.2572064995765686 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.48605668544769287, + "learning_rate": 2.4202432878563493e-05, + "loss": 0.2587, + "step": 12929, + "teacher_loss": 0.23340681195259094 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.51018887758255, + "learning_rate": 2.4200639171248724e-05, + "loss": 0.2467, + "step": 12930, + "teacher_loss": 0.21747201681137085 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 1.2381446361541748, + "learning_rate": 2.419884525299088e-05, + "loss": 0.5333, + "step": 12931, + "teacher_loss": 0.45499977469444275 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.16283690929412842, + "learning_rate": 2.419705112383109e-05, + "loss": 0.3474, + "step": 12932, + "teacher_loss": 0.36786627769470215 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.6832075119018555, + "learning_rate": 2.4195256783810494e-05, + "loss": 0.2207, + "step": 12933, + "teacher_loss": 0.1692793220281601 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.1613585352897644, + "learning_rate": 2.4193462232970233e-05, + "loss": 0.1564, + "step": 12934, + "teacher_loss": 0.1558990776538849 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.3078269958496094, + "learning_rate": 2.4191667471351444e-05, + "loss": 0.2014, + "step": 12935, + "teacher_loss": 0.18962684273719788 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.8351870179176331, + "learning_rate": 2.4189872498995275e-05, + "loss": 0.3237, + "step": 12936, + "teacher_loss": 0.26690834760665894 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.1850230097770691, + "learning_rate": 2.4188077315942883e-05, + "loss": 0.171, + "step": 12937, + "teacher_loss": 0.16943730413913727 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.6224699020385742, + "learning_rate": 2.4186281922235433e-05, + "loss": 0.277, + "step": 12938, + "teacher_loss": 0.23863530158996582 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5444072484970093, + "learning_rate": 2.418448631791407e-05, + "loss": 0.3496, + "step": 12939, + "teacher_loss": 0.32795074582099915 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.3170098662376404, + "learning_rate": 2.4182690503019976e-05, + "loss": 0.3244, + "step": 12940, + "teacher_loss": 0.3252352774143219 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.302931010723114, + "learning_rate": 2.418089447759432e-05, + "loss": 0.2019, + "step": 12941, + "teacher_loss": 0.19065412878990173 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5815369486808777, + "learning_rate": 2.417909824167828e-05, + "loss": 0.2874, + "step": 12942, + "teacher_loss": 0.25477108359336853 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.32386094331741333, + "learning_rate": 2.417730179531304e-05, + "loss": 0.1849, + "step": 12943, + "teacher_loss": 0.16948944330215454 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.6512867212295532, + "learning_rate": 2.417550513853978e-05, + "loss": 0.2304, + "step": 12944, + "teacher_loss": 0.1836310774087906 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.2545059323310852, + "learning_rate": 2.41737082713997e-05, + "loss": 0.2309, + "step": 12945, + "teacher_loss": 0.2283049076795578 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4940689206123352, + "learning_rate": 2.4171911193933995e-05, + "loss": 0.2268, + "step": 12946, + "teacher_loss": 0.1970774382352829 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.3047316074371338, + "learning_rate": 2.4170113906183863e-05, + "loss": 0.2181, + "step": 12947, + "teacher_loss": 0.20843788981437683 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.950525164604187, + "learning_rate": 2.4168316408190508e-05, + "loss": 0.2977, + "step": 12948, + "teacher_loss": 0.2251891791820526 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4151240587234497, + "learning_rate": 2.4166518699995152e-05, + "loss": 0.2681, + "step": 12949, + "teacher_loss": 0.25171977281570435 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.263724148273468, + "learning_rate": 2.4164720781639002e-05, + "loss": 0.2371, + "step": 12950, + "teacher_loss": 0.23416763544082642 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.6097198128700256, + "learning_rate": 2.4162922653163283e-05, + "loss": 0.3844, + "step": 12951, + "teacher_loss": 0.35935860872268677 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.19677267968654633, + "learning_rate": 2.4161124314609216e-05, + "loss": 0.2537, + "step": 12952, + "teacher_loss": 0.2599792778491974 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.43097972869873047, + "learning_rate": 2.415932576601804e-05, + "loss": 0.2859, + "step": 12953, + "teacher_loss": 0.2697813808917999 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4007374346256256, + "learning_rate": 2.4157527007430987e-05, + "loss": 0.2022, + "step": 12954, + "teacher_loss": 0.18011607229709625 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4225662350654602, + "learning_rate": 2.4155728038889288e-05, + "loss": 0.297, + "step": 12955, + "teacher_loss": 0.2830048203468323 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.6245284676551819, + "learning_rate": 2.41539288604342e-05, + "loss": 0.3412, + "step": 12956, + "teacher_loss": 0.30976295471191406 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.7469122409820557, + "learning_rate": 2.4152129472106967e-05, + "loss": 0.2704, + "step": 12957, + "teacher_loss": 0.21744760870933533 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5938356518745422, + "learning_rate": 2.4150329873948846e-05, + "loss": 0.2721, + "step": 12958, + "teacher_loss": 0.23637652397155762 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5098090767860413, + "learning_rate": 2.414853006600109e-05, + "loss": 0.2569, + "step": 12959, + "teacher_loss": 0.22878050804138184 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.2532508969306946, + "learning_rate": 2.4146730048304974e-05, + "loss": 0.1694, + "step": 12960, + "teacher_loss": 0.16007313132286072 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.3584757447242737, + "learning_rate": 2.414492982090176e-05, + "loss": 0.2856, + "step": 12961, + "teacher_loss": 0.27745863795280457 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.3009755611419678, + "learning_rate": 2.414312938383272e-05, + "loss": 0.2786, + "step": 12962, + "teacher_loss": 0.2760612666606903 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4707462787628174, + "learning_rate": 2.4141328737139142e-05, + "loss": 0.2504, + "step": 12963, + "teacher_loss": 0.22595767676830292 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5244890451431274, + "learning_rate": 2.41395278808623e-05, + "loss": 0.2976, + "step": 12964, + "teacher_loss": 0.27240821719169617 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.09805616736412048, + "learning_rate": 2.4137726815043483e-05, + "loss": 0.1416, + "step": 12965, + "teacher_loss": 0.14643973112106323 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5068673491477966, + "learning_rate": 2.413592553972399e-05, + "loss": 0.3147, + "step": 12966, + "teacher_loss": 0.29337289929389954 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.44178706407546997, + "learning_rate": 2.4134124054945115e-05, + "loss": 0.2537, + "step": 12967, + "teacher_loss": 0.23279644548892975 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.4431309700012207, + "learning_rate": 2.413232236074816e-05, + "loss": 0.2117, + "step": 12968, + "teacher_loss": 0.18601390719413757 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.5502498149871826, + "learning_rate": 2.4130520457174432e-05, + "loss": 0.2932, + "step": 12969, + "teacher_loss": 0.2646617293357849 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.593531608581543, + "learning_rate": 2.4128718344265246e-05, + "loss": 0.3587, + "step": 12970, + "teacher_loss": 0.3325750231742859 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.8571649193763733, + "learning_rate": 2.4126916022061916e-05, + "loss": 1.056, + "step": 12971, + "teacher_loss": 1.0780761241912842 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.7511194944381714, + "learning_rate": 2.412511349060577e-05, + "loss": 0.3463, + "step": 12972, + "teacher_loss": 0.301334410905838 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.1949847936630249, + "learning_rate": 2.4123310749938124e-05, + "loss": 0.23, + "step": 12973, + "teacher_loss": 0.2338375300168991 + }, + { + "compression_loss": 0.0, + "epoch": 2.34, + "label_loss": 0.42560848593711853, + "learning_rate": 2.412150780010032e-05, + "loss": 0.3273, + "step": 12974, + "teacher_loss": 0.31637537479400635 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.15780578553676605, + "learning_rate": 2.411970464113369e-05, + "loss": 0.2449, + "step": 12975, + "teacher_loss": 0.2545315623283386 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4683195948600769, + "learning_rate": 2.4117901273079578e-05, + "loss": 0.294, + "step": 12976, + "teacher_loss": 0.2745956480503082 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4335499703884125, + "learning_rate": 2.4116097695979315e-05, + "loss": 0.2497, + "step": 12977, + "teacher_loss": 0.2292264699935913 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5725017786026001, + "learning_rate": 2.4114293909874276e-05, + "loss": 0.2751, + "step": 12978, + "teacher_loss": 0.2420164793729782 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5932450890541077, + "learning_rate": 2.4112489914805798e-05, + "loss": 0.242, + "step": 12979, + "teacher_loss": 0.202985018491745 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.46854367852211, + "learning_rate": 2.4110685710815245e-05, + "loss": 0.3681, + "step": 12980, + "teacher_loss": 0.35695719718933105 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5692731738090515, + "learning_rate": 2.4108881297943985e-05, + "loss": 0.2404, + "step": 12981, + "teacher_loss": 0.2038128525018692 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.29996636509895325, + "learning_rate": 2.4107076676233388e-05, + "loss": 0.231, + "step": 12982, + "teacher_loss": 0.22337926924228668 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5239046216011047, + "learning_rate": 2.410527184572483e-05, + "loss": 0.3938, + "step": 12983, + "teacher_loss": 0.37928909063339233 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.14179030060768127, + "learning_rate": 2.410346680645968e-05, + "loss": 0.1952, + "step": 12984, + "teacher_loss": 0.20113706588745117 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.609269380569458, + "learning_rate": 2.4101661558479336e-05, + "loss": 0.3447, + "step": 12985, + "teacher_loss": 0.3152737617492676 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.43035614490509033, + "learning_rate": 2.4099856101825177e-05, + "loss": 0.2076, + "step": 12986, + "teacher_loss": 0.1828109323978424 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5959734320640564, + "learning_rate": 2.4098050436538608e-05, + "loss": 0.2486, + "step": 12987, + "teacher_loss": 0.21003341674804688 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6695563197135925, + "learning_rate": 2.4096244562661014e-05, + "loss": 0.3115, + "step": 12988, + "teacher_loss": 0.27170369029045105 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6153783798217773, + "learning_rate": 2.40944384802338e-05, + "loss": 0.2904, + "step": 12989, + "teacher_loss": 0.2542846202850342 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4398386478424072, + "learning_rate": 2.4092632189298384e-05, + "loss": 0.3017, + "step": 12990, + "teacher_loss": 0.28638529777526855 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.7592819929122925, + "learning_rate": 2.409082568989617e-05, + "loss": 0.6927, + "step": 12991, + "teacher_loss": 0.6853551864624023 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.36400753259658813, + "learning_rate": 2.408901898206858e-05, + "loss": 0.2877, + "step": 12992, + "teacher_loss": 0.2791703939437866 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.37116605043411255, + "learning_rate": 2.4087212065857037e-05, + "loss": 0.1809, + "step": 12993, + "teacher_loss": 0.15975654125213623 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.3625389337539673, + "learning_rate": 2.4085404941302963e-05, + "loss": 0.2835, + "step": 12994, + "teacher_loss": 0.27467402815818787 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6715219020843506, + "learning_rate": 2.4083597608447797e-05, + "loss": 0.3734, + "step": 12995, + "teacher_loss": 0.34024500846862793 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.24331369996070862, + "learning_rate": 2.408179006733297e-05, + "loss": 0.2583, + "step": 12996, + "teacher_loss": 0.259914368391037 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.930972158908844, + "learning_rate": 2.4079982317999923e-05, + "loss": 0.4675, + "step": 12997, + "teacher_loss": 0.41603749990463257 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4074627161026001, + "learning_rate": 2.4078174360490105e-05, + "loss": 0.3719, + "step": 12998, + "teacher_loss": 0.36791500449180603 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 1.1688551902770996, + "learning_rate": 2.4076366194844967e-05, + "loss": 0.2962, + "step": 12999, + "teacher_loss": 0.19921022653579712 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.2772231698036194, + "learning_rate": 2.4074557821105967e-05, + "loss": 0.3056, + "step": 13000, + "teacher_loss": 0.3087250292301178 + }, + { + "epoch": 2.35, + "eval_exact_match": 79.50804162724693, + "eval_f1": 87.11857570895603, + "step": 13000 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.48724859952926636, + "learning_rate": 2.4072749239314565e-05, + "loss": 0.2717, + "step": 13001, + "teacher_loss": 0.24779880046844482 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.42677101492881775, + "learning_rate": 2.407094044951222e-05, + "loss": 0.2201, + "step": 13002, + "teacher_loss": 0.19712623953819275 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.2668790817260742, + "learning_rate": 2.4069131451740405e-05, + "loss": 0.178, + "step": 13003, + "teacher_loss": 0.16812410950660706 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5046723484992981, + "learning_rate": 2.4067322246040604e-05, + "loss": 0.2798, + "step": 13004, + "teacher_loss": 0.25482630729675293 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4670093059539795, + "learning_rate": 2.4065512832454285e-05, + "loss": 0.2522, + "step": 13005, + "teacher_loss": 0.2283104658126831 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4037174582481384, + "learning_rate": 2.4063703211022934e-05, + "loss": 0.3289, + "step": 13006, + "teacher_loss": 0.32054901123046875 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.8299764394760132, + "learning_rate": 2.4061893381788044e-05, + "loss": 0.3451, + "step": 13007, + "teacher_loss": 0.2912542223930359 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6067514419555664, + "learning_rate": 2.406008334479111e-05, + "loss": 0.3132, + "step": 13008, + "teacher_loss": 0.28060683608055115 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.29773783683776855, + "learning_rate": 2.4058273100073625e-05, + "loss": 0.4186, + "step": 13009, + "teacher_loss": 0.4319990277290344 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6178734302520752, + "learning_rate": 2.4056462647677098e-05, + "loss": 0.3844, + "step": 13010, + "teacher_loss": 0.3584526479244232 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.22649678587913513, + "learning_rate": 2.4054651987643037e-05, + "loss": 0.2315, + "step": 13011, + "teacher_loss": 0.232097327709198 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4931163787841797, + "learning_rate": 2.405284112001295e-05, + "loss": 0.2842, + "step": 13012, + "teacher_loss": 0.2609565556049347 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6831939816474915, + "learning_rate": 2.405103004482836e-05, + "loss": 0.259, + "step": 13013, + "teacher_loss": 0.21185873448848724 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.23586717247962952, + "learning_rate": 2.4049218762130782e-05, + "loss": 0.3572, + "step": 13014, + "teacher_loss": 0.3706822693347931 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.15566489100456238, + "learning_rate": 2.404740727196176e-05, + "loss": 0.2357, + "step": 13015, + "teacher_loss": 0.24456503987312317 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6956691145896912, + "learning_rate": 2.4045595574362805e-05, + "loss": 0.3429, + "step": 13016, + "teacher_loss": 0.3036884665489197 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5355004072189331, + "learning_rate": 2.404378366937547e-05, + "loss": 0.2581, + "step": 13017, + "teacher_loss": 0.22725075483322144 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.5629329681396484, + "learning_rate": 2.4041971557041282e-05, + "loss": 0.4853, + "step": 13018, + "teacher_loss": 0.4766855239868164 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4025377035140991, + "learning_rate": 2.4040159237401802e-05, + "loss": 0.2525, + "step": 13019, + "teacher_loss": 0.23581308126449585 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.4799646735191345, + "learning_rate": 2.4038346710498574e-05, + "loss": 0.2959, + "step": 13020, + "teacher_loss": 0.2755019962787628 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.8231446146965027, + "learning_rate": 2.4036533976373153e-05, + "loss": 0.6856, + "step": 13021, + "teacher_loss": 0.6702879667282104 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.26017385721206665, + "learning_rate": 2.40347210350671e-05, + "loss": 0.2506, + "step": 13022, + "teacher_loss": 0.2495332956314087 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.35366058349609375, + "learning_rate": 2.4032907886621984e-05, + "loss": 0.3247, + "step": 13023, + "teacher_loss": 0.32142865657806396 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 1.2711718082427979, + "learning_rate": 2.403109453107937e-05, + "loss": 0.3749, + "step": 13024, + "teacher_loss": 0.27532148361206055 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.8148959875106812, + "learning_rate": 2.4029280968480832e-05, + "loss": 0.7794, + "step": 13025, + "teacher_loss": 0.7754602432250977 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.7226345539093018, + "learning_rate": 2.4027467198867957e-05, + "loss": 0.3409, + "step": 13026, + "teacher_loss": 0.2984299063682556 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.35176289081573486, + "learning_rate": 2.4025653222282324e-05, + "loss": 0.3125, + "step": 13027, + "teacher_loss": 0.30808767676353455 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.26792585849761963, + "learning_rate": 2.4023839038765525e-05, + "loss": 0.1785, + "step": 13028, + "teacher_loss": 0.16851915419101715 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.6042264699935913, + "learning_rate": 2.4022024648359147e-05, + "loss": 0.2457, + "step": 13029, + "teacher_loss": 0.2058224380016327 + }, + { + "compression_loss": 0.0, + "epoch": 2.35, + "label_loss": 0.24340839684009552, + "learning_rate": 2.4020210051104796e-05, + "loss": 0.1287, + "step": 13030, + "teacher_loss": 0.11591649055480957 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3650391101837158, + "learning_rate": 2.4018395247044074e-05, + "loss": 0.3565, + "step": 13031, + "teacher_loss": 0.3555457592010498 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5148910880088806, + "learning_rate": 2.4016580236218585e-05, + "loss": 0.2747, + "step": 13032, + "teacher_loss": 0.2479724884033203 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.4794608950614929, + "learning_rate": 2.4014765018669948e-05, + "loss": 0.2279, + "step": 13033, + "teacher_loss": 0.19998782873153687 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.49105241894721985, + "learning_rate": 2.4012949594439773e-05, + "loss": 0.3682, + "step": 13034, + "teacher_loss": 0.3545871078968048 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3831844925880432, + "learning_rate": 2.4011133963569683e-05, + "loss": 0.2565, + "step": 13035, + "teacher_loss": 0.24241241812705994 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 1.0132230520248413, + "learning_rate": 2.400931812610131e-05, + "loss": 0.3664, + "step": 13036, + "teacher_loss": 0.2944791913032532 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5572211146354675, + "learning_rate": 2.400750208207629e-05, + "loss": 0.2423, + "step": 13037, + "teacher_loss": 0.20729447901248932 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.6663508415222168, + "learning_rate": 2.4005685831536248e-05, + "loss": 0.2686, + "step": 13038, + "teacher_loss": 0.22439594566822052 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.783195972442627, + "learning_rate": 2.4003869374522832e-05, + "loss": 0.4137, + "step": 13039, + "teacher_loss": 0.37265104055404663 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5105181932449341, + "learning_rate": 2.4002052711077685e-05, + "loss": 0.3055, + "step": 13040, + "teacher_loss": 0.2827637791633606 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.31425777077674866, + "learning_rate": 2.4000235841242457e-05, + "loss": 0.2017, + "step": 13041, + "teacher_loss": 0.18913927674293518 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.7799074649810791, + "learning_rate": 2.3998418765058813e-05, + "loss": 0.2922, + "step": 13042, + "teacher_loss": 0.23801207542419434 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.43896448612213135, + "learning_rate": 2.3996601482568395e-05, + "loss": 0.204, + "step": 13043, + "teacher_loss": 0.17783747613430023 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.6664102673530579, + "learning_rate": 2.399478399381288e-05, + "loss": 0.2434, + "step": 13044, + "teacher_loss": 0.1964050531387329 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.39133018255233765, + "learning_rate": 2.3992966298833945e-05, + "loss": 0.2961, + "step": 13045, + "teacher_loss": 0.2855234146118164 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.33472564816474915, + "learning_rate": 2.3991148397673247e-05, + "loss": 0.1984, + "step": 13046, + "teacher_loss": 0.1832292079925537 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.4228655993938446, + "learning_rate": 2.3989330290372476e-05, + "loss": 0.3104, + "step": 13047, + "teacher_loss": 0.29794585704803467 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.7621529698371887, + "learning_rate": 2.398751197697331e-05, + "loss": 0.2786, + "step": 13048, + "teacher_loss": 0.2248266637325287 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 1.0917127132415771, + "learning_rate": 2.3985693457517444e-05, + "loss": 0.3016, + "step": 13049, + "teacher_loss": 0.21382871270179749 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.7560780644416809, + "learning_rate": 2.3983874732046566e-05, + "loss": 0.3908, + "step": 13050, + "teacher_loss": 0.3501774072647095 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.45956382155418396, + "learning_rate": 2.3982055800602374e-05, + "loss": 0.319, + "step": 13051, + "teacher_loss": 0.3033749461174011 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.46796321868896484, + "learning_rate": 2.3980236663226574e-05, + "loss": 0.2386, + "step": 13052, + "teacher_loss": 0.21316882967948914 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.6254762411117554, + "learning_rate": 2.3978417319960872e-05, + "loss": 0.3448, + "step": 13053, + "teacher_loss": 0.31356537342071533 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.2411290407180786, + "learning_rate": 2.3976597770846977e-05, + "loss": 0.2698, + "step": 13054, + "teacher_loss": 0.2730352282524109 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.7285536527633667, + "learning_rate": 2.3974778015926607e-05, + "loss": 0.2679, + "step": 13055, + "teacher_loss": 0.21672102808952332 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.12649649381637573, + "learning_rate": 2.397295805524149e-05, + "loss": 0.1768, + "step": 13056, + "teacher_loss": 0.1823521852493286 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.1894121766090393, + "learning_rate": 2.397113788883334e-05, + "loss": 0.2567, + "step": 13057, + "teacher_loss": 0.26419055461883545 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.396454393863678, + "learning_rate": 2.3969317516743902e-05, + "loss": 0.3096, + "step": 13058, + "teacher_loss": 0.29998183250427246 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3379182815551758, + "learning_rate": 2.3967496939014904e-05, + "loss": 0.329, + "step": 13059, + "teacher_loss": 0.3280338644981384 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.9625312089920044, + "learning_rate": 2.3965676155688085e-05, + "loss": 0.6018, + "step": 13060, + "teacher_loss": 0.5617706179618835 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.47420382499694824, + "learning_rate": 2.3963855166805195e-05, + "loss": 0.3892, + "step": 13061, + "teacher_loss": 0.37970829010009766 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.35196182131767273, + "learning_rate": 2.3962033972407978e-05, + "loss": 0.3564, + "step": 13062, + "teacher_loss": 0.35692551732063293 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.615990161895752, + "learning_rate": 2.3960212572538193e-05, + "loss": 0.2929, + "step": 13063, + "teacher_loss": 0.2569584250450134 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.2505914270877838, + "learning_rate": 2.39583909672376e-05, + "loss": 0.1605, + "step": 13064, + "teacher_loss": 0.1505323350429535 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3963540196418762, + "learning_rate": 2.395656915654795e-05, + "loss": 0.2649, + "step": 13065, + "teacher_loss": 0.2503194212913513 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.38173213601112366, + "learning_rate": 2.3954747140511034e-05, + "loss": 0.4081, + "step": 13066, + "teacher_loss": 0.41103553771972656 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 1.1104040145874023, + "learning_rate": 2.395292491916861e-05, + "loss": 0.3491, + "step": 13067, + "teacher_loss": 0.2645367383956909 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.8333873152732849, + "learning_rate": 2.3951102492562456e-05, + "loss": 0.4994, + "step": 13068, + "teacher_loss": 0.46227729320526123 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5347037315368652, + "learning_rate": 2.3949279860734366e-05, + "loss": 0.3395, + "step": 13069, + "teacher_loss": 0.31778132915496826 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.33279284834861755, + "learning_rate": 2.3947457023726118e-05, + "loss": 0.2469, + "step": 13070, + "teacher_loss": 0.23739996552467346 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3597686290740967, + "learning_rate": 2.3945633981579506e-05, + "loss": 0.2124, + "step": 13071, + "teacher_loss": 0.19603756070137024 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5787531733512878, + "learning_rate": 2.3943810734336325e-05, + "loss": 0.3539, + "step": 13072, + "teacher_loss": 0.3289181888103485 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.23367469012737274, + "learning_rate": 2.394198728203838e-05, + "loss": 0.2434, + "step": 13073, + "teacher_loss": 0.24449768662452698 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.46680039167404175, + "learning_rate": 2.3940163624727477e-05, + "loss": 0.2442, + "step": 13074, + "teacher_loss": 0.21946552395820618 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.1710911989212036, + "learning_rate": 2.3938339762445426e-05, + "loss": 0.1713, + "step": 13075, + "teacher_loss": 0.1713024079799652 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5758000016212463, + "learning_rate": 2.3936515695234046e-05, + "loss": 0.3851, + "step": 13076, + "teacher_loss": 0.3638860285282135 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.357105553150177, + "learning_rate": 2.3934691423135145e-05, + "loss": 0.2604, + "step": 13077, + "teacher_loss": 0.24965639412403107 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.29043570160865784, + "learning_rate": 2.3932866946190567e-05, + "loss": 0.2982, + "step": 13078, + "teacher_loss": 0.2990264296531677 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.5501710176467896, + "learning_rate": 2.393104226444212e-05, + "loss": 0.3048, + "step": 13079, + "teacher_loss": 0.27749407291412354 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.9138692617416382, + "learning_rate": 2.3929217377931665e-05, + "loss": 0.5338, + "step": 13080, + "teacher_loss": 0.49158889055252075 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3634622097015381, + "learning_rate": 2.392739228670102e-05, + "loss": 0.1321, + "step": 13081, + "teacher_loss": 0.1063578873872757 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3361184298992157, + "learning_rate": 2.3925566990792033e-05, + "loss": 0.1603, + "step": 13082, + "teacher_loss": 0.14072290062904358 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.7473549842834473, + "learning_rate": 2.392374149024656e-05, + "loss": 0.3559, + "step": 13083, + "teacher_loss": 0.3123679757118225 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.3420478105545044, + "learning_rate": 2.3921915785106446e-05, + "loss": 0.2304, + "step": 13084, + "teacher_loss": 0.2179594188928604 + }, + { + "compression_loss": 0.0, + "epoch": 2.36, + "label_loss": 0.6225875020027161, + "learning_rate": 2.3920089875413553e-05, + "loss": 0.3771, + "step": 13085, + "teacher_loss": 0.34978365898132324 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.779904305934906, + "learning_rate": 2.3918263761209746e-05, + "loss": 0.2891, + "step": 13086, + "teacher_loss": 0.23457324504852295 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.3165227770805359, + "learning_rate": 2.3916437442536885e-05, + "loss": 0.1997, + "step": 13087, + "teacher_loss": 0.18668217957019806 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.4047742486000061, + "learning_rate": 2.3914610919436843e-05, + "loss": 0.2468, + "step": 13088, + "teacher_loss": 0.22930093109607697 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.2210788130760193, + "learning_rate": 2.3912784191951505e-05, + "loss": 0.2253, + "step": 13089, + "teacher_loss": 0.2258215993642807 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.71305251121521, + "learning_rate": 2.391095726012275e-05, + "loss": 0.4028, + "step": 13090, + "teacher_loss": 0.3682757019996643 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.585705041885376, + "learning_rate": 2.390913012399246e-05, + "loss": 0.3035, + "step": 13091, + "teacher_loss": 0.27212128043174744 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.31673091650009155, + "learning_rate": 2.3907302783602522e-05, + "loss": 0.2039, + "step": 13092, + "teacher_loss": 0.19141682982444763 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5323687195777893, + "learning_rate": 2.3905475238994844e-05, + "loss": 0.2625, + "step": 13093, + "teacher_loss": 0.23250234127044678 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.2283971905708313, + "learning_rate": 2.3903647490211317e-05, + "loss": 0.1882, + "step": 13094, + "teacher_loss": 0.1836811751127243 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.46738266944885254, + "learning_rate": 2.3901819537293843e-05, + "loss": 0.2395, + "step": 13095, + "teacher_loss": 0.21416707336902618 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.2503347098827362, + "learning_rate": 2.389999138028434e-05, + "loss": 0.2066, + "step": 13096, + "teacher_loss": 0.20170539617538452 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.3184281289577484, + "learning_rate": 2.389816301922472e-05, + "loss": 0.1897, + "step": 13097, + "teacher_loss": 0.17538359761238098 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.3551815152168274, + "learning_rate": 2.3896334454156898e-05, + "loss": 0.298, + "step": 13098, + "teacher_loss": 0.2916863262653351 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.7322406768798828, + "learning_rate": 2.3894505685122796e-05, + "loss": 0.3185, + "step": 13099, + "teacher_loss": 0.2725127339363098 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.2698879837989807, + "learning_rate": 2.389267671216435e-05, + "loss": 0.22, + "step": 13100, + "teacher_loss": 0.2144922912120819 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.6521711349487305, + "learning_rate": 2.3890847535323485e-05, + "loss": 0.4658, + "step": 13101, + "teacher_loss": 0.4450523257255554 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.25021934509277344, + "learning_rate": 2.3889018154642145e-05, + "loss": 0.2571, + "step": 13102, + "teacher_loss": 0.2579100728034973 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.4916960895061493, + "learning_rate": 2.3887188570162266e-05, + "loss": 0.2333, + "step": 13103, + "teacher_loss": 0.2045513242483139 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.43899160623550415, + "learning_rate": 2.3885358781925802e-05, + "loss": 0.196, + "step": 13104, + "teacher_loss": 0.16897623240947723 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5957446098327637, + "learning_rate": 2.3883528789974703e-05, + "loss": 0.4954, + "step": 13105, + "teacher_loss": 0.4842890501022339 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.32777661085128784, + "learning_rate": 2.388169859435092e-05, + "loss": 0.2274, + "step": 13106, + "teacher_loss": 0.21625575423240662 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.38102781772613525, + "learning_rate": 2.387986819509642e-05, + "loss": 0.1865, + "step": 13107, + "teacher_loss": 0.16483914852142334 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.24892719089984894, + "learning_rate": 2.387803759225316e-05, + "loss": 0.2052, + "step": 13108, + "teacher_loss": 0.20032528042793274 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.3413429260253906, + "learning_rate": 2.3876206785863114e-05, + "loss": 0.2351, + "step": 13109, + "teacher_loss": 0.2233349233865738 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.6450066566467285, + "learning_rate": 2.3874375775968263e-05, + "loss": 0.2654, + "step": 13110, + "teacher_loss": 0.22318929433822632 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.24692469835281372, + "learning_rate": 2.3872544562610586e-05, + "loss": 0.2872, + "step": 13111, + "teacher_loss": 0.29168492555618286 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.2794077694416046, + "learning_rate": 2.3870713145832057e-05, + "loss": 0.1885, + "step": 13112, + "teacher_loss": 0.1783529818058014 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 1.1267932653427124, + "learning_rate": 2.3868881525674674e-05, + "loss": 0.4188, + "step": 13113, + "teacher_loss": 0.34011781215667725 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.937947154045105, + "learning_rate": 2.3867049702180428e-05, + "loss": 0.434, + "step": 13114, + "teacher_loss": 0.37800148129463196 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5105807781219482, + "learning_rate": 2.3865217675391315e-05, + "loss": 0.2486, + "step": 13115, + "teacher_loss": 0.21944299340248108 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.614952027797699, + "learning_rate": 2.3863385445349342e-05, + "loss": 0.2856, + "step": 13116, + "teacher_loss": 0.24900464713573456 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.446087121963501, + "learning_rate": 2.386155301209651e-05, + "loss": 0.3331, + "step": 13117, + "teacher_loss": 0.3205166459083557 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.6839063167572021, + "learning_rate": 2.3859720375674843e-05, + "loss": 0.3027, + "step": 13118, + "teacher_loss": 0.26030343770980835 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.25020039081573486, + "learning_rate": 2.385788753612635e-05, + "loss": 0.1681, + "step": 13119, + "teacher_loss": 0.1590082347393036 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.7375144362449646, + "learning_rate": 2.3856054493493048e-05, + "loss": 0.266, + "step": 13120, + "teacher_loss": 0.2136373370885849 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5715293884277344, + "learning_rate": 2.385422124781697e-05, + "loss": 0.3339, + "step": 13121, + "teacher_loss": 0.30750131607055664 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.7466154098510742, + "learning_rate": 2.385238779914015e-05, + "loss": 0.2962, + "step": 13122, + "teacher_loss": 0.24614648520946503 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.22063696384429932, + "learning_rate": 2.3850554147504614e-05, + "loss": 0.1653, + "step": 13123, + "teacher_loss": 0.15916435420513153 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.4357588589191437, + "learning_rate": 2.384872029295241e-05, + "loss": 0.2787, + "step": 13124, + "teacher_loss": 0.2612892687320709 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5342859625816345, + "learning_rate": 2.3846886235525573e-05, + "loss": 0.4384, + "step": 13125, + "teacher_loss": 0.4277440309524536 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.29129987955093384, + "learning_rate": 2.3845051975266162e-05, + "loss": 0.1558, + "step": 13126, + "teacher_loss": 0.14073993265628815 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.16417798399925232, + "learning_rate": 2.384321751221623e-05, + "loss": 0.222, + "step": 13127, + "teacher_loss": 0.2284564971923828 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5167200565338135, + "learning_rate": 2.3841382846417835e-05, + "loss": 0.2678, + "step": 13128, + "teacher_loss": 0.2401035726070404 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5882191061973572, + "learning_rate": 2.3839547977913036e-05, + "loss": 0.2696, + "step": 13129, + "teacher_loss": 0.23424233496189117 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 1.027330994606018, + "learning_rate": 2.3837712906743905e-05, + "loss": 0.4813, + "step": 13130, + "teacher_loss": 0.4206332862377167 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.26825690269470215, + "learning_rate": 2.3835877632952516e-05, + "loss": 0.2326, + "step": 13131, + "teacher_loss": 0.2286532074213028 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.30091434717178345, + "learning_rate": 2.383404215658094e-05, + "loss": 0.2452, + "step": 13132, + "teacher_loss": 0.23905707895755768 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.48598629236221313, + "learning_rate": 2.383220647767127e-05, + "loss": 0.3119, + "step": 13133, + "teacher_loss": 0.29258108139038086 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 1.1943050622940063, + "learning_rate": 2.3830370596265576e-05, + "loss": 0.384, + "step": 13134, + "teacher_loss": 0.2940070629119873 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.4006240963935852, + "learning_rate": 2.3828534512405968e-05, + "loss": 0.2519, + "step": 13135, + "teacher_loss": 0.2353377640247345 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5557321310043335, + "learning_rate": 2.382669822613453e-05, + "loss": 0.5776, + "step": 13136, + "teacher_loss": 0.5800773501396179 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.2372012585401535, + "learning_rate": 2.3824861737493362e-05, + "loss": 0.2197, + "step": 13137, + "teacher_loss": 0.21771565079689026 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.5158334374427795, + "learning_rate": 2.3823025046524576e-05, + "loss": 0.2303, + "step": 13138, + "teacher_loss": 0.19856229424476624 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.4155901074409485, + "learning_rate": 2.382118815327028e-05, + "loss": 0.19, + "step": 13139, + "teacher_loss": 0.1649080216884613 + }, + { + "compression_loss": 0.0, + "epoch": 2.37, + "label_loss": 0.6908024549484253, + "learning_rate": 2.381935105777258e-05, + "loss": 0.3165, + "step": 13140, + "teacher_loss": 0.27495890855789185 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.42068690061569214, + "learning_rate": 2.381751376007361e-05, + "loss": 0.2223, + "step": 13141, + "teacher_loss": 0.2002897709608078 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.6808729767799377, + "learning_rate": 2.381567626021548e-05, + "loss": 0.3067, + "step": 13142, + "teacher_loss": 0.2651064693927765 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.30923861265182495, + "learning_rate": 2.381383855824033e-05, + "loss": 0.3385, + "step": 13143, + "teacher_loss": 0.3417533040046692 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5256384015083313, + "learning_rate": 2.3812000654190286e-05, + "loss": 0.2833, + "step": 13144, + "teacher_loss": 0.25634124875068665 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.4820489287376404, + "learning_rate": 2.381016254810748e-05, + "loss": 0.3384, + "step": 13145, + "teacher_loss": 0.3224857449531555 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.4537546932697296, + "learning_rate": 2.3808324240034066e-05, + "loss": 0.2344, + "step": 13146, + "teacher_loss": 0.21003296971321106 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.30748042464256287, + "learning_rate": 2.380648573001219e-05, + "loss": 0.2476, + "step": 13147, + "teacher_loss": 0.2409113347530365 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.4503805637359619, + "learning_rate": 2.3804647018083992e-05, + "loss": 0.376, + "step": 13148, + "teacher_loss": 0.36771833896636963 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.6322369575500488, + "learning_rate": 2.380280810429164e-05, + "loss": 0.3802, + "step": 13149, + "teacher_loss": 0.3521774709224701 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.21972379088401794, + "learning_rate": 2.3800968988677287e-05, + "loss": 0.1823, + "step": 13150, + "teacher_loss": 0.17814847826957703 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.8011816740036011, + "learning_rate": 2.3799129671283104e-05, + "loss": 0.2327, + "step": 13151, + "teacher_loss": 0.1695370078086853 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 1.0780887603759766, + "learning_rate": 2.3797290152151257e-05, + "loss": 0.4058, + "step": 13152, + "teacher_loss": 0.33110371232032776 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.3133049011230469, + "learning_rate": 2.3795450431323925e-05, + "loss": 0.2541, + "step": 13153, + "teacher_loss": 0.24751552939414978 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.7453523278236389, + "learning_rate": 2.3793610508843286e-05, + "loss": 0.4024, + "step": 13154, + "teacher_loss": 0.3643397092819214 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.1672964245080948, + "learning_rate": 2.3791770384751516e-05, + "loss": 0.2, + "step": 13155, + "teacher_loss": 0.20362040400505066 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.22130584716796875, + "learning_rate": 2.3789930059090814e-05, + "loss": 0.1694, + "step": 13156, + "teacher_loss": 0.16361220180988312 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.4113225042819977, + "learning_rate": 2.3788089531903372e-05, + "loss": 0.2569, + "step": 13157, + "teacher_loss": 0.23979498445987701 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.3953251242637634, + "learning_rate": 2.3786248803231383e-05, + "loss": 0.2404, + "step": 13158, + "teacher_loss": 0.22322587668895721 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5500034689903259, + "learning_rate": 2.378440787311705e-05, + "loss": 0.2831, + "step": 13159, + "teacher_loss": 0.2533941864967346 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.23266354203224182, + "learning_rate": 2.3782566741602585e-05, + "loss": 0.2217, + "step": 13160, + "teacher_loss": 0.220467209815979 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.3935345411300659, + "learning_rate": 2.378072540873019e-05, + "loss": 0.2525, + "step": 13161, + "teacher_loss": 0.23687484860420227 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.3831459879875183, + "learning_rate": 2.3778883874542098e-05, + "loss": 0.2121, + "step": 13162, + "teacher_loss": 0.1931067705154419 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.24893715977668762, + "learning_rate": 2.377704213908051e-05, + "loss": 0.2141, + "step": 13163, + "teacher_loss": 0.2101958841085434 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.4469989538192749, + "learning_rate": 2.3775200202387663e-05, + "loss": 0.2188, + "step": 13164, + "teacher_loss": 0.1934095174074173 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.46684351563453674, + "learning_rate": 2.3773358064505784e-05, + "loss": 0.2413, + "step": 13165, + "teacher_loss": 0.21619799733161926 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.28017908334732056, + "learning_rate": 2.377151572547711e-05, + "loss": 0.1762, + "step": 13166, + "teacher_loss": 0.16465477645397186 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5015286803245544, + "learning_rate": 2.3769673185343877e-05, + "loss": 0.3183, + "step": 13167, + "teacher_loss": 0.2979453504085541 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.23387598991394043, + "learning_rate": 2.3767830444148335e-05, + "loss": 0.1898, + "step": 13168, + "teacher_loss": 0.1848611682653427 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5822409391403198, + "learning_rate": 2.3765987501932724e-05, + "loss": 0.3214, + "step": 13169, + "teacher_loss": 0.2923853397369385 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5652356147766113, + "learning_rate": 2.3764144358739302e-05, + "loss": 0.3594, + "step": 13170, + "teacher_loss": 0.3365795314311981 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.7127039432525635, + "learning_rate": 2.3762301014610326e-05, + "loss": 0.3148, + "step": 13171, + "teacher_loss": 0.27053916454315186 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5804683566093445, + "learning_rate": 2.376045746958806e-05, + "loss": 0.304, + "step": 13172, + "teacher_loss": 0.27332770824432373 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.22764703631401062, + "learning_rate": 2.375861372371477e-05, + "loss": 0.1507, + "step": 13173, + "teacher_loss": 0.1422058343887329 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.4561974108219147, + "learning_rate": 2.3756769777032726e-05, + "loss": 0.1991, + "step": 13174, + "teacher_loss": 0.17057496309280396 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.30623212456703186, + "learning_rate": 2.3754925629584204e-05, + "loss": 0.169, + "step": 13175, + "teacher_loss": 0.15378184616565704 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.2700462341308594, + "learning_rate": 2.3753081281411483e-05, + "loss": 0.2424, + "step": 13176, + "teacher_loss": 0.23936600983142853 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.23005768656730652, + "learning_rate": 2.3751236732556857e-05, + "loss": 0.3319, + "step": 13177, + "teacher_loss": 0.3431857228279114 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5549268126487732, + "learning_rate": 2.374939198306261e-05, + "loss": 0.2093, + "step": 13178, + "teacher_loss": 0.17088985443115234 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.23624272644519806, + "learning_rate": 2.3747547032971036e-05, + "loss": 0.2157, + "step": 13179, + "teacher_loss": 0.21345439553260803 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.27924948930740356, + "learning_rate": 2.374570188232443e-05, + "loss": 0.2226, + "step": 13180, + "teacher_loss": 0.21627815067768097 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.43899205327033997, + "learning_rate": 2.374385653116511e-05, + "loss": 0.2046, + "step": 13181, + "teacher_loss": 0.1785382330417633 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.48236730694770813, + "learning_rate": 2.3742010979535366e-05, + "loss": 0.3228, + "step": 13182, + "teacher_loss": 0.3050217628479004 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5388857126235962, + "learning_rate": 2.3740165227477523e-05, + "loss": 0.431, + "step": 13183, + "teacher_loss": 0.41895946860313416 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.6362184882164001, + "learning_rate": 2.3738319275033903e-05, + "loss": 0.3217, + "step": 13184, + "teacher_loss": 0.2868010997772217 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.2992188036441803, + "learning_rate": 2.3736473122246812e-05, + "loss": 0.2017, + "step": 13185, + "teacher_loss": 0.19082872569561005 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.6334612965583801, + "learning_rate": 2.3734626769158587e-05, + "loss": 0.2914, + "step": 13186, + "teacher_loss": 0.2533993422985077 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5217605829238892, + "learning_rate": 2.3732780215811563e-05, + "loss": 0.3696, + "step": 13187, + "teacher_loss": 0.3526615500450134 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.3592368960380554, + "learning_rate": 2.3730933462248065e-05, + "loss": 0.2565, + "step": 13188, + "teacher_loss": 0.24505166709423065 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.32157981395721436, + "learning_rate": 2.372908650851044e-05, + "loss": 0.1859, + "step": 13189, + "teacher_loss": 0.1707925796508789 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.6584714651107788, + "learning_rate": 2.372723935464104e-05, + "loss": 0.379, + "step": 13190, + "teacher_loss": 0.3479844331741333 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.35353463888168335, + "learning_rate": 2.37253920006822e-05, + "loss": 0.3739, + "step": 13191, + "teacher_loss": 0.3761264681816101 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5478827357292175, + "learning_rate": 2.3723544446676283e-05, + "loss": 0.2573, + "step": 13192, + "teacher_loss": 0.22501471638679504 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.14495782554149628, + "learning_rate": 2.3721696692665644e-05, + "loss": 0.1674, + "step": 13193, + "teacher_loss": 0.16993612051010132 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.18254454433918, + "learning_rate": 2.3719848738692653e-05, + "loss": 0.2128, + "step": 13194, + "teacher_loss": 0.21612313389778137 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.8071692585945129, + "learning_rate": 2.371800058479967e-05, + "loss": 0.2532, + "step": 13195, + "teacher_loss": 0.19159573316574097 + }, + { + "compression_loss": 0.0, + "epoch": 2.38, + "label_loss": 0.5467922687530518, + "learning_rate": 2.3716152231029077e-05, + "loss": 0.2462, + "step": 13196, + "teacher_loss": 0.21282029151916504 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3530251085758209, + "learning_rate": 2.3714303677423242e-05, + "loss": 0.2971, + "step": 13197, + "teacher_loss": 0.2909325063228607 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.23572660982608795, + "learning_rate": 2.371245492402455e-05, + "loss": 0.1372, + "step": 13198, + "teacher_loss": 0.12627235054969788 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.533949613571167, + "learning_rate": 2.3710605970875388e-05, + "loss": 0.3334, + "step": 13199, + "teacher_loss": 0.31114646792411804 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.7894918322563171, + "learning_rate": 2.3708756818018147e-05, + "loss": 0.3494, + "step": 13200, + "teacher_loss": 0.30044567584991455 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.47003427147865295, + "learning_rate": 2.370690746549522e-05, + "loss": 0.2602, + "step": 13201, + "teacher_loss": 0.23685386776924133 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5281930565834045, + "learning_rate": 2.3705057913349017e-05, + "loss": 0.3049, + "step": 13202, + "teacher_loss": 0.2801341414451599 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.41883549094200134, + "learning_rate": 2.3703208161621932e-05, + "loss": 0.1874, + "step": 13203, + "teacher_loss": 0.16173753142356873 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.28319305181503296, + "learning_rate": 2.3701358210356375e-05, + "loss": 0.2921, + "step": 13204, + "teacher_loss": 0.2930489182472229 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3948586583137512, + "learning_rate": 2.3699508059594762e-05, + "loss": 0.276, + "step": 13205, + "teacher_loss": 0.26273927092552185 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 1.120369791984558, + "learning_rate": 2.3697657709379513e-05, + "loss": 0.3739, + "step": 13206, + "teacher_loss": 0.2910025119781494 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 1.2317451238632202, + "learning_rate": 2.369580715975305e-05, + "loss": 0.4627, + "step": 13207, + "teacher_loss": 0.37722089886665344 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.18874123692512512, + "learning_rate": 2.36939564107578e-05, + "loss": 0.1608, + "step": 13208, + "teacher_loss": 0.1576671600341797 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5665850639343262, + "learning_rate": 2.3692105462436198e-05, + "loss": 0.3509, + "step": 13209, + "teacher_loss": 0.3269417881965637 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.46877068281173706, + "learning_rate": 2.3690254314830674e-05, + "loss": 0.2421, + "step": 13210, + "teacher_loss": 0.2169049084186554 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.18176183104515076, + "learning_rate": 2.3688402967983672e-05, + "loss": 0.203, + "step": 13211, + "teacher_loss": 0.2054072767496109 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5467663407325745, + "learning_rate": 2.3686551421937642e-05, + "loss": 0.2183, + "step": 13212, + "teacher_loss": 0.1817675083875656 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.4091041088104248, + "learning_rate": 2.3684699676735033e-05, + "loss": 0.2503, + "step": 13213, + "teacher_loss": 0.23265963792800903 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.2440621256828308, + "learning_rate": 2.3682847732418295e-05, + "loss": 0.2594, + "step": 13214, + "teacher_loss": 0.26114779710769653 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.29172462224960327, + "learning_rate": 2.3680995589029895e-05, + "loss": 0.1875, + "step": 13215, + "teacher_loss": 0.1759473830461502 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5511808395385742, + "learning_rate": 2.367914324661229e-05, + "loss": 0.2796, + "step": 13216, + "teacher_loss": 0.24937835335731506 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.2184123694896698, + "learning_rate": 2.367729070520795e-05, + "loss": 0.2604, + "step": 13217, + "teacher_loss": 0.2650441527366638 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.7929906845092773, + "learning_rate": 2.3675437964859355e-05, + "loss": 0.4554, + "step": 13218, + "teacher_loss": 0.4179278016090393 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 1.0236337184906006, + "learning_rate": 2.3673585025608976e-05, + "loss": 0.4708, + "step": 13219, + "teacher_loss": 0.4093579649925232 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.4887635409832001, + "learning_rate": 2.3671731887499296e-05, + "loss": 0.2945, + "step": 13220, + "teacher_loss": 0.27289286255836487 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.46457576751708984, + "learning_rate": 2.3669878550572803e-05, + "loss": 0.322, + "step": 13221, + "teacher_loss": 0.3061993718147278 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.2688802480697632, + "learning_rate": 2.3668025014871987e-05, + "loss": 0.2852, + "step": 13222, + "teacher_loss": 0.28706392645835876 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.4268842935562134, + "learning_rate": 2.366617128043935e-05, + "loss": 0.1877, + "step": 13223, + "teacher_loss": 0.16117416322231293 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3449594974517822, + "learning_rate": 2.3664317347317382e-05, + "loss": 0.3286, + "step": 13224, + "teacher_loss": 0.32683229446411133 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5010718107223511, + "learning_rate": 2.3662463215548602e-05, + "loss": 0.2413, + "step": 13225, + "teacher_loss": 0.21245194971561432 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3536940813064575, + "learning_rate": 2.3660608885175505e-05, + "loss": 0.1708, + "step": 13226, + "teacher_loss": 0.15052807331085205 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.6104978919029236, + "learning_rate": 2.3658754356240613e-05, + "loss": 0.3089, + "step": 13227, + "teacher_loss": 0.27539628744125366 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.682197093963623, + "learning_rate": 2.365689962878645e-05, + "loss": 0.2431, + "step": 13228, + "teacher_loss": 0.19426822662353516 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.8661444187164307, + "learning_rate": 2.3655044702855526e-05, + "loss": 0.2865, + "step": 13229, + "teacher_loss": 0.22214969992637634 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.6998417973518372, + "learning_rate": 2.365318957849038e-05, + "loss": 0.3742, + "step": 13230, + "teacher_loss": 0.33803316950798035 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.6644032001495361, + "learning_rate": 2.365133425573354e-05, + "loss": 0.6048, + "step": 13231, + "teacher_loss": 0.5982170104980469 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5087882280349731, + "learning_rate": 2.3649478734627543e-05, + "loss": 0.3571, + "step": 13232, + "teacher_loss": 0.34019148349761963 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.12305235117673874, + "learning_rate": 2.364762301521493e-05, + "loss": 0.1624, + "step": 13233, + "teacher_loss": 0.1667397916316986 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5764172077178955, + "learning_rate": 2.3645767097538252e-05, + "loss": 0.3396, + "step": 13234, + "teacher_loss": 0.31329360604286194 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.11184930801391602, + "learning_rate": 2.3643910981640052e-05, + "loss": 0.2183, + "step": 13235, + "teacher_loss": 0.23007959127426147 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5248562097549438, + "learning_rate": 2.364205466756289e-05, + "loss": 0.233, + "step": 13236, + "teacher_loss": 0.20059174299240112 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5020205974578857, + "learning_rate": 2.3640198155349324e-05, + "loss": 0.3308, + "step": 13237, + "teacher_loss": 0.3118058443069458 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5215932130813599, + "learning_rate": 2.363834144504192e-05, + "loss": 0.2583, + "step": 13238, + "teacher_loss": 0.22909843921661377 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.7117658853530884, + "learning_rate": 2.3636484536683246e-05, + "loss": 0.3941, + "step": 13239, + "teacher_loss": 0.35885149240493774 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.7666230797767639, + "learning_rate": 2.3634627430315874e-05, + "loss": 0.5013, + "step": 13240, + "teacher_loss": 0.4717782735824585 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.8392297029495239, + "learning_rate": 2.363277012598238e-05, + "loss": 0.3495, + "step": 13241, + "teacher_loss": 0.295083224773407 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3412531018257141, + "learning_rate": 2.363091262372536e-05, + "loss": 0.2044, + "step": 13242, + "teacher_loss": 0.18923211097717285 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.7438136339187622, + "learning_rate": 2.362905492358738e-05, + "loss": 0.3879, + "step": 13243, + "teacher_loss": 0.3483033776283264 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.21026575565338135, + "learning_rate": 2.3627197025611046e-05, + "loss": 0.2205, + "step": 13244, + "teacher_loss": 0.22163403034210205 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3444966673851013, + "learning_rate": 2.3625338929838952e-05, + "loss": 0.2076, + "step": 13245, + "teacher_loss": 0.19241565465927124 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3984840214252472, + "learning_rate": 2.362348063631369e-05, + "loss": 0.2926, + "step": 13246, + "teacher_loss": 0.28083133697509766 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5067142844200134, + "learning_rate": 2.362162214507788e-05, + "loss": 0.3486, + "step": 13247, + "teacher_loss": 0.33100634813308716 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.3130027949810028, + "learning_rate": 2.3619763456174116e-05, + "loss": 0.246, + "step": 13248, + "teacher_loss": 0.2385254055261612 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.47818103432655334, + "learning_rate": 2.361790456964502e-05, + "loss": 0.368, + "step": 13249, + "teacher_loss": 0.35578304529190063 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5812355279922485, + "learning_rate": 2.361604548553321e-05, + "loss": 0.3013, + "step": 13250, + "teacher_loss": 0.2701554596424103 + }, + { + "epoch": 2.39, + "eval_exact_match": 79.60264900662251, + "eval_f1": 87.21485608055386, + "step": 13250 + }, + { + "compression_loss": 0.0, + "epoch": 2.39, + "label_loss": 0.5362725257873535, + "learning_rate": 2.361418620388131e-05, + "loss": 0.2659, + "step": 13251, + "teacher_loss": 0.23582231998443604 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6154779195785522, + "learning_rate": 2.361232672473195e-05, + "loss": 0.2692, + "step": 13252, + "teacher_loss": 0.23071923851966858 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.30691590905189514, + "learning_rate": 2.3610467048127758e-05, + "loss": 0.225, + "step": 13253, + "teacher_loss": 0.21593570709228516 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.39117273688316345, + "learning_rate": 2.3608607174111375e-05, + "loss": 0.3015, + "step": 13254, + "teacher_loss": 0.2915579378604889 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.5042406320571899, + "learning_rate": 2.3606747102725433e-05, + "loss": 0.3296, + "step": 13255, + "teacher_loss": 0.3101460933685303 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.3229520320892334, + "learning_rate": 2.3604886834012587e-05, + "loss": 0.2132, + "step": 13256, + "teacher_loss": 0.2010575532913208 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6501572132110596, + "learning_rate": 2.3603026368015482e-05, + "loss": 0.323, + "step": 13257, + "teacher_loss": 0.2866288125514984 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6359395980834961, + "learning_rate": 2.3601165704776784e-05, + "loss": 0.2745, + "step": 13258, + "teacher_loss": 0.2343650907278061 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.44736701250076294, + "learning_rate": 2.359930484433914e-05, + "loss": 0.2478, + "step": 13259, + "teacher_loss": 0.2256748080253601 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.8413408398628235, + "learning_rate": 2.359744378674521e-05, + "loss": 0.3767, + "step": 13260, + "teacher_loss": 0.3251223564147949 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.1133200079202652, + "learning_rate": 2.359558253203768e-05, + "loss": 0.1862, + "step": 13261, + "teacher_loss": 0.19428731501102448 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.3415602445602417, + "learning_rate": 2.359372108025921e-05, + "loss": 0.1611, + "step": 13262, + "teacher_loss": 0.1410546600818634 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6447021961212158, + "learning_rate": 2.359185943145248e-05, + "loss": 0.2584, + "step": 13263, + "teacher_loss": 0.21545115113258362 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.26609092950820923, + "learning_rate": 2.3589997585660174e-05, + "loss": 0.2162, + "step": 13264, + "teacher_loss": 0.21067406237125397 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.37561583518981934, + "learning_rate": 2.358813554292498e-05, + "loss": 0.2056, + "step": 13265, + "teacher_loss": 0.18672534823417664 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.29276537895202637, + "learning_rate": 2.3586273303289584e-05, + "loss": 0.2576, + "step": 13266, + "teacher_loss": 0.2536574602127075 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6726639866828918, + "learning_rate": 2.3584410866796687e-05, + "loss": 0.3798, + "step": 13267, + "teacher_loss": 0.3472104072570801 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.3950142562389374, + "learning_rate": 2.358254823348898e-05, + "loss": 0.2666, + "step": 13268, + "teacher_loss": 0.25232625007629395 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.44658881425857544, + "learning_rate": 2.3580685403409177e-05, + "loss": 0.261, + "step": 13269, + "teacher_loss": 0.2403843104839325 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.7590472102165222, + "learning_rate": 2.357882237659999e-05, + "loss": 0.2677, + "step": 13270, + "teacher_loss": 0.2130640745162964 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6913022398948669, + "learning_rate": 2.3576959153104115e-05, + "loss": 0.2862, + "step": 13271, + "teacher_loss": 0.24124255776405334 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.25179898738861084, + "learning_rate": 2.357509573296429e-05, + "loss": 0.2209, + "step": 13272, + "teacher_loss": 0.21751710772514343 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.16739974915981293, + "learning_rate": 2.3573232116223224e-05, + "loss": 0.1898, + "step": 13273, + "teacher_loss": 0.19231358170509338 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.40578174591064453, + "learning_rate": 2.3571368302923656e-05, + "loss": 0.2363, + "step": 13274, + "teacher_loss": 0.21745160222053528 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.1465100646018982, + "learning_rate": 2.3569504293108304e-05, + "loss": 0.2661, + "step": 13275, + "teacher_loss": 0.27939239144325256 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.46484997868537903, + "learning_rate": 2.3567640086819915e-05, + "loss": 0.3472, + "step": 13276, + "teacher_loss": 0.33410775661468506 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.2843043804168701, + "learning_rate": 2.356577568410123e-05, + "loss": 0.2406, + "step": 13277, + "teacher_loss": 0.23579701781272888 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.3907982110977173, + "learning_rate": 2.3563911084994986e-05, + "loss": 0.3182, + "step": 13278, + "teacher_loss": 0.3101189136505127 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.416643351316452, + "learning_rate": 2.3562046289543934e-05, + "loss": 0.2802, + "step": 13279, + "teacher_loss": 0.264987587928772 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 1.1521563529968262, + "learning_rate": 2.3560181297790834e-05, + "loss": 0.3197, + "step": 13280, + "teacher_loss": 0.22725586593151093 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.30872660875320435, + "learning_rate": 2.3558316109778436e-05, + "loss": 0.2603, + "step": 13281, + "teacher_loss": 0.2549632787704468 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.4649592339992523, + "learning_rate": 2.3556450725549514e-05, + "loss": 0.202, + "step": 13282, + "teacher_loss": 0.1727590560913086 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6302200555801392, + "learning_rate": 2.3554585145146833e-05, + "loss": 0.3153, + "step": 13283, + "teacher_loss": 0.28030288219451904 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.5839126110076904, + "learning_rate": 2.3552719368613158e-05, + "loss": 0.2834, + "step": 13284, + "teacher_loss": 0.25003737211227417 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.37018340826034546, + "learning_rate": 2.355085339599127e-05, + "loss": 0.2312, + "step": 13285, + "teacher_loss": 0.21577343344688416 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.3876428008079529, + "learning_rate": 2.3548987227323957e-05, + "loss": 0.1963, + "step": 13286, + "teacher_loss": 0.1750679612159729 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.25159209966659546, + "learning_rate": 2.3547120862653995e-05, + "loss": 0.2217, + "step": 13287, + "teacher_loss": 0.218379408121109 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.41399773955345154, + "learning_rate": 2.3545254302024177e-05, + "loss": 0.234, + "step": 13288, + "teacher_loss": 0.21398620307445526 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.2447696328163147, + "learning_rate": 2.3543387545477295e-05, + "loss": 0.1677, + "step": 13289, + "teacher_loss": 0.15912030637264252 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.24025991559028625, + "learning_rate": 2.354152059305615e-05, + "loss": 0.1992, + "step": 13290, + "teacher_loss": 0.19461937248706818 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.07693510502576828, + "learning_rate": 2.3539653444803552e-05, + "loss": 0.1713, + "step": 13291, + "teacher_loss": 0.1818252056837082 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.4016713500022888, + "learning_rate": 2.35377861007623e-05, + "loss": 0.2479, + "step": 13292, + "teacher_loss": 0.23080652952194214 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.38348233699798584, + "learning_rate": 2.353591856097521e-05, + "loss": 0.2463, + "step": 13293, + "teacher_loss": 0.23110228776931763 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.2720690071582794, + "learning_rate": 2.3534050825485102e-05, + "loss": 0.1789, + "step": 13294, + "teacher_loss": 0.16853559017181396 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.7292126417160034, + "learning_rate": 2.3532182894334793e-05, + "loss": 0.3442, + "step": 13295, + "teacher_loss": 0.30139869451522827 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.4321862757205963, + "learning_rate": 2.353031476756711e-05, + "loss": 0.2281, + "step": 13296, + "teacher_loss": 0.2054426074028015 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6623647212982178, + "learning_rate": 2.352844644522489e-05, + "loss": 0.347, + "step": 13297, + "teacher_loss": 0.3119175434112549 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.30049002170562744, + "learning_rate": 2.3526577927350956e-05, + "loss": 0.1817, + "step": 13298, + "teacher_loss": 0.16851076483726501 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.48915815353393555, + "learning_rate": 2.352470921398816e-05, + "loss": 0.2795, + "step": 13299, + "teacher_loss": 0.2562389373779297 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.7183676362037659, + "learning_rate": 2.3522840305179334e-05, + "loss": 0.2722, + "step": 13300, + "teacher_loss": 0.2225809544324875 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.43166446685791016, + "learning_rate": 2.3520971200967337e-05, + "loss": 0.3333, + "step": 13301, + "teacher_loss": 0.3224182724952698 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.35628020763397217, + "learning_rate": 2.3519101901395016e-05, + "loss": 0.2392, + "step": 13302, + "teacher_loss": 0.22622382640838623 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.22884929180145264, + "learning_rate": 2.3517232406505233e-05, + "loss": 0.3245, + "step": 13303, + "teacher_loss": 0.33512285351753235 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6160867214202881, + "learning_rate": 2.3515362716340844e-05, + "loss": 0.2689, + "step": 13304, + "teacher_loss": 0.2303493618965149 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.6933183670043945, + "learning_rate": 2.3513492830944718e-05, + "loss": 0.2939, + "step": 13305, + "teacher_loss": 0.24950064718723297 + }, + { + "compression_loss": 0.0, + "epoch": 2.4, + "label_loss": 0.4467252492904663, + "learning_rate": 2.3511622750359726e-05, + "loss": 0.2875, + "step": 13306, + "teacher_loss": 0.2698519229888916 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.2195822298526764, + "learning_rate": 2.3509752474628744e-05, + "loss": 0.2231, + "step": 13307, + "teacher_loss": 0.22346210479736328 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.45221322774887085, + "learning_rate": 2.3507882003794653e-05, + "loss": 0.3031, + "step": 13308, + "teacher_loss": 0.2864859104156494 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5276502370834351, + "learning_rate": 2.3506011337900333e-05, + "loss": 0.3065, + "step": 13309, + "teacher_loss": 0.2819198668003082 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.4648154675960541, + "learning_rate": 2.3504140476988678e-05, + "loss": 0.249, + "step": 13310, + "teacher_loss": 0.2250230759382248 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5794755220413208, + "learning_rate": 2.3502269421102578e-05, + "loss": 0.2803, + "step": 13311, + "teacher_loss": 0.247026726603508 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.4667815566062927, + "learning_rate": 2.3500398170284926e-05, + "loss": 0.2498, + "step": 13312, + "teacher_loss": 0.2256973683834076 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.4610523581504822, + "learning_rate": 2.3498526724578637e-05, + "loss": 0.3301, + "step": 13313, + "teacher_loss": 0.31560200452804565 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.7091777920722961, + "learning_rate": 2.349665508402661e-05, + "loss": 0.3493, + "step": 13314, + "teacher_loss": 0.3093149662017822 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.12012657523155212, + "learning_rate": 2.3494783248671747e-05, + "loss": 0.1756, + "step": 13315, + "teacher_loss": 0.18178194761276245 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5337127447128296, + "learning_rate": 2.349291121855698e-05, + "loss": 0.3213, + "step": 13316, + "teacher_loss": 0.29771876335144043 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5708508491516113, + "learning_rate": 2.349103899372522e-05, + "loss": 0.2701, + "step": 13317, + "teacher_loss": 0.2366747409105301 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.8888444900512695, + "learning_rate": 2.3489166574219396e-05, + "loss": 0.5195, + "step": 13318, + "teacher_loss": 0.47845685482025146 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.4197937548160553, + "learning_rate": 2.3487293960082435e-05, + "loss": 0.3263, + "step": 13319, + "teacher_loss": 0.3159257173538208 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.2690635025501251, + "learning_rate": 2.3485421151357264e-05, + "loss": 0.1844, + "step": 13320, + "teacher_loss": 0.17497417330741882 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5177335739135742, + "learning_rate": 2.3483548148086832e-05, + "loss": 0.4386, + "step": 13321, + "teacher_loss": 0.42978376150131226 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.6527668237686157, + "learning_rate": 2.348167495031407e-05, + "loss": 0.3442, + "step": 13322, + "teacher_loss": 0.3098776340484619 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.408937931060791, + "learning_rate": 2.3479801558081936e-05, + "loss": 0.3662, + "step": 13323, + "teacher_loss": 0.3614322543144226 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.19425126910209656, + "learning_rate": 2.3477927971433374e-05, + "loss": 0.23, + "step": 13324, + "teacher_loss": 0.23392419517040253 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.36549636721611023, + "learning_rate": 2.3476054190411344e-05, + "loss": 0.2518, + "step": 13325, + "teacher_loss": 0.2392052263021469 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.646171510219574, + "learning_rate": 2.3474180215058804e-05, + "loss": 0.2778, + "step": 13326, + "teacher_loss": 0.23688417673110962 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5570650100708008, + "learning_rate": 2.3472306045418715e-05, + "loss": 0.4014, + "step": 13327, + "teacher_loss": 0.3841387629508972 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.23006096482276917, + "learning_rate": 2.3470431681534052e-05, + "loss": 0.2965, + "step": 13328, + "teacher_loss": 0.3038666248321533 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.3316557705402374, + "learning_rate": 2.3468557123447784e-05, + "loss": 0.324, + "step": 13329, + "teacher_loss": 0.32318708300590515 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.2770814895629883, + "learning_rate": 2.3466682371202896e-05, + "loss": 0.2277, + "step": 13330, + "teacher_loss": 0.22219160199165344 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5649198293685913, + "learning_rate": 2.346480742484236e-05, + "loss": 0.6044, + "step": 13331, + "teacher_loss": 0.6087542772293091 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.20918157696723938, + "learning_rate": 2.3462932284409174e-05, + "loss": 0.1894, + "step": 13332, + "teacher_loss": 0.18716877698898315 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.47164082527160645, + "learning_rate": 2.346105694994632e-05, + "loss": 0.219, + "step": 13333, + "teacher_loss": 0.19089549779891968 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.20764794945716858, + "learning_rate": 2.3459181421496797e-05, + "loss": 0.2172, + "step": 13334, + "teacher_loss": 0.218232661485672 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.26438969373703003, + "learning_rate": 2.345730569910361e-05, + "loss": 0.2291, + "step": 13335, + "teacher_loss": 0.2252292037010193 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.7862716317176819, + "learning_rate": 2.3455429782809756e-05, + "loss": 0.8569, + "step": 13336, + "teacher_loss": 0.8647250533103943 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.9031293392181396, + "learning_rate": 2.345355367265825e-05, + "loss": 0.34, + "step": 13337, + "teacher_loss": 0.2773993909358978 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.8896418809890747, + "learning_rate": 2.3451677368692098e-05, + "loss": 0.9277, + "step": 13338, + "teacher_loss": 0.9319241642951965 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.4690718650817871, + "learning_rate": 2.344980087095433e-05, + "loss": 0.3052, + "step": 13339, + "teacher_loss": 0.2869587540626526 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5100768804550171, + "learning_rate": 2.344792417948796e-05, + "loss": 0.2902, + "step": 13340, + "teacher_loss": 0.2657606601715088 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.40496814250946045, + "learning_rate": 2.3446047294336018e-05, + "loss": 0.3124, + "step": 13341, + "teacher_loss": 0.3021667003631592 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.4826464056968689, + "learning_rate": 2.3444170215541533e-05, + "loss": 0.2932, + "step": 13342, + "teacher_loss": 0.27211669087409973 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5637249946594238, + "learning_rate": 2.3442292943147543e-05, + "loss": 0.338, + "step": 13343, + "teacher_loss": 0.31291282176971436 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.24867957830429077, + "learning_rate": 2.3440415477197083e-05, + "loss": 0.1922, + "step": 13344, + "teacher_loss": 0.18588200211524963 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.6272789835929871, + "learning_rate": 2.3438537817733204e-05, + "loss": 0.3343, + "step": 13345, + "teacher_loss": 0.301755428314209 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.3251725435256958, + "learning_rate": 2.3436659964798953e-05, + "loss": 0.3287, + "step": 13346, + "teacher_loss": 0.3290470242500305 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.6259438395500183, + "learning_rate": 2.3434781918437384e-05, + "loss": 0.2872, + "step": 13347, + "teacher_loss": 0.24957610666751862 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.3182956576347351, + "learning_rate": 2.3432903678691555e-05, + "loss": 0.2385, + "step": 13348, + "teacher_loss": 0.22966709733009338 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.5025835037231445, + "learning_rate": 2.3431025245604526e-05, + "loss": 0.2478, + "step": 13349, + "teacher_loss": 0.21952247619628906 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.30710211396217346, + "learning_rate": 2.342914661921937e-05, + "loss": 0.2076, + "step": 13350, + "teacher_loss": 0.19656185805797577 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.6085900068283081, + "learning_rate": 2.342726779957915e-05, + "loss": 0.2964, + "step": 13351, + "teacher_loss": 0.26168394088745117 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.3433447480201721, + "learning_rate": 2.3425388786726944e-05, + "loss": 0.1928, + "step": 13352, + "teacher_loss": 0.17605873942375183 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.48761940002441406, + "learning_rate": 2.3423509580705838e-05, + "loss": 0.2631, + "step": 13353, + "teacher_loss": 0.23814114928245544 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.6748328804969788, + "learning_rate": 2.342163018155891e-05, + "loss": 0.2722, + "step": 13354, + "teacher_loss": 0.22741976380348206 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.3338778614997864, + "learning_rate": 2.341975058932925e-05, + "loss": 0.2723, + "step": 13355, + "teacher_loss": 0.2654338777065277 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.88533616065979, + "learning_rate": 2.3417870804059953e-05, + "loss": 0.2671, + "step": 13356, + "teacher_loss": 0.19835810363292694 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.9712544679641724, + "learning_rate": 2.3415990825794118e-05, + "loss": 0.2783, + "step": 13357, + "teacher_loss": 0.2012614756822586 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.46000081300735474, + "learning_rate": 2.341411065457484e-05, + "loss": 0.2631, + "step": 13358, + "teacher_loss": 0.24117402732372284 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.7524264454841614, + "learning_rate": 2.341223029044524e-05, + "loss": 0.4171, + "step": 13359, + "teacher_loss": 0.3798047602176666 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.23651687800884247, + "learning_rate": 2.341034973344842e-05, + "loss": 0.239, + "step": 13360, + "teacher_loss": 0.23926573991775513 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.26024967432022095, + "learning_rate": 2.3408468983627493e-05, + "loss": 0.2254, + "step": 13361, + "teacher_loss": 0.22150051593780518 + }, + { + "compression_loss": 0.0, + "epoch": 2.41, + "label_loss": 0.31224629282951355, + "learning_rate": 2.3406588041025584e-05, + "loss": 0.2441, + "step": 13362, + "teacher_loss": 0.23649385571479797 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.24041607975959778, + "learning_rate": 2.340470690568581e-05, + "loss": 0.2517, + "step": 13363, + "teacher_loss": 0.2529626488685608 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.699574887752533, + "learning_rate": 2.340282557765131e-05, + "loss": 0.3712, + "step": 13364, + "teacher_loss": 0.33475714921951294 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.28519248962402344, + "learning_rate": 2.3400944056965217e-05, + "loss": 0.1575, + "step": 13365, + "teacher_loss": 0.14330917596817017 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 1.6159348487854004, + "learning_rate": 2.339906234367066e-05, + "loss": 1.1932, + "step": 13366, + "teacher_loss": 1.1461822986602783 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.743181049823761, + "learning_rate": 2.339718043781078e-05, + "loss": 0.291, + "step": 13367, + "teacher_loss": 0.2407325804233551 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.6673940420150757, + "learning_rate": 2.3395298339428735e-05, + "loss": 0.3579, + "step": 13368, + "teacher_loss": 0.3235345780849457 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.5583533048629761, + "learning_rate": 2.3393416048567666e-05, + "loss": 0.3667, + "step": 13369, + "teacher_loss": 0.34545665979385376 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.37425071001052856, + "learning_rate": 2.3391533565270736e-05, + "loss": 0.2678, + "step": 13370, + "teacher_loss": 0.25599926710128784 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.3710380494594574, + "learning_rate": 2.3389650889581097e-05, + "loss": 0.2606, + "step": 13371, + "teacher_loss": 0.2483477145433426 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.49056482315063477, + "learning_rate": 2.3387768021541914e-05, + "loss": 0.2498, + "step": 13372, + "teacher_loss": 0.22305841743946075 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.25216203927993774, + "learning_rate": 2.338588496119636e-05, + "loss": 0.2029, + "step": 13373, + "teacher_loss": 0.1974402517080307 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.3505750894546509, + "learning_rate": 2.338400170858761e-05, + "loss": 0.2536, + "step": 13374, + "teacher_loss": 0.24279339611530304 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.5070775747299194, + "learning_rate": 2.3382118263758836e-05, + "loss": 0.2596, + "step": 13375, + "teacher_loss": 0.23207318782806396 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.47342830896377563, + "learning_rate": 2.338023462675322e-05, + "loss": 0.2571, + "step": 13376, + "teacher_loss": 0.2330133020877838 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.3738238215446472, + "learning_rate": 2.3378350797613948e-05, + "loss": 0.2336, + "step": 13377, + "teacher_loss": 0.21803408861160278 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.7364017963409424, + "learning_rate": 2.3376466776384212e-05, + "loss": 0.2842, + "step": 13378, + "teacher_loss": 0.23400413990020752 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.324739545583725, + "learning_rate": 2.3374582563107207e-05, + "loss": 0.2768, + "step": 13379, + "teacher_loss": 0.2714645266532898 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.4032590091228485, + "learning_rate": 2.3372698157826136e-05, + "loss": 0.2588, + "step": 13380, + "teacher_loss": 0.242717906832695 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.31986749172210693, + "learning_rate": 2.3370813560584194e-05, + "loss": 0.2594, + "step": 13381, + "teacher_loss": 0.252629816532135 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.3156262934207916, + "learning_rate": 2.3368928771424597e-05, + "loss": 0.3198, + "step": 13382, + "teacher_loss": 0.32029280066490173 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.07356128096580505, + "learning_rate": 2.3367043790390552e-05, + "loss": 0.1474, + "step": 13383, + "teacher_loss": 0.15559235215187073 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.45045191049575806, + "learning_rate": 2.336515861752528e-05, + "loss": 0.3141, + "step": 13384, + "teacher_loss": 0.2990008592605591 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.6026839017868042, + "learning_rate": 2.3363273252872003e-05, + "loss": 0.3588, + "step": 13385, + "teacher_loss": 0.33167368173599243 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.5101771354675293, + "learning_rate": 2.336138769647394e-05, + "loss": 0.2728, + "step": 13386, + "teacher_loss": 0.24644535779953003 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.8842732906341553, + "learning_rate": 2.3359501948374332e-05, + "loss": 0.3238, + "step": 13387, + "teacher_loss": 0.2615572214126587 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 1.1212352514266968, + "learning_rate": 2.3357616008616404e-05, + "loss": 0.4686, + "step": 13388, + "teacher_loss": 0.3960660696029663 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.38443708419799805, + "learning_rate": 2.3355729877243394e-05, + "loss": 0.4303, + "step": 13389, + "teacher_loss": 0.4354340732097626 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.41377824544906616, + "learning_rate": 2.3353843554298555e-05, + "loss": 0.2278, + "step": 13390, + "teacher_loss": 0.2071402668952942 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.31958824396133423, + "learning_rate": 2.335195703982513e-05, + "loss": 0.2569, + "step": 13391, + "teacher_loss": 0.24990540742874146 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.8397213220596313, + "learning_rate": 2.3350070333866367e-05, + "loss": 0.3171, + "step": 13392, + "teacher_loss": 0.2589814066886902 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.598009467124939, + "learning_rate": 2.334818343646553e-05, + "loss": 0.7791, + "step": 13393, + "teacher_loss": 0.7992147207260132 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.6784560680389404, + "learning_rate": 2.3346296347665872e-05, + "loss": 0.2982, + "step": 13394, + "teacher_loss": 0.25590789318084717 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.48372238874435425, + "learning_rate": 2.3344409067510665e-05, + "loss": 0.2908, + "step": 13395, + "teacher_loss": 0.269344687461853 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.3220786452293396, + "learning_rate": 2.3342521596043176e-05, + "loss": 0.3307, + "step": 13396, + "teacher_loss": 0.3316769599914551 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.30690819025039673, + "learning_rate": 2.3340633933306677e-05, + "loss": 0.1833, + "step": 13397, + "teacher_loss": 0.16959123313426971 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.6358180046081543, + "learning_rate": 2.333874607934445e-05, + "loss": 0.2628, + "step": 13398, + "teacher_loss": 0.22134973108768463 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.1271497905254364, + "learning_rate": 2.3336858034199774e-05, + "loss": 0.1772, + "step": 13399, + "teacher_loss": 0.18276116251945496 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.5849890112876892, + "learning_rate": 2.333496979791594e-05, + "loss": 0.2801, + "step": 13400, + "teacher_loss": 0.24623671174049377 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.43503814935684204, + "learning_rate": 2.3333081370536236e-05, + "loss": 0.2974, + "step": 13401, + "teacher_loss": 0.28212088346481323 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.454201877117157, + "learning_rate": 2.333119275210397e-05, + "loss": 0.3415, + "step": 13402, + "teacher_loss": 0.3290107250213623 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.28829506039619446, + "learning_rate": 2.332930394266242e-05, + "loss": 0.2138, + "step": 13403, + "teacher_loss": 0.2055424600839615 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.40818512439727783, + "learning_rate": 2.332741494225491e-05, + "loss": 0.3488, + "step": 13404, + "teacher_loss": 0.3421688675880432 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.4157203435897827, + "learning_rate": 2.3325525750924737e-05, + "loss": 0.3272, + "step": 13405, + "teacher_loss": 0.31736963987350464 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.39925360679626465, + "learning_rate": 2.3323636368715227e-05, + "loss": 0.2416, + "step": 13406, + "teacher_loss": 0.22404126822948456 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.32423126697540283, + "learning_rate": 2.332174679566968e-05, + "loss": 0.2564, + "step": 13407, + "teacher_loss": 0.24884669482707977 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.4168992042541504, + "learning_rate": 2.331985703183144e-05, + "loss": 0.1689, + "step": 13408, + "teacher_loss": 0.14130720496177673 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.3969237208366394, + "learning_rate": 2.3317967077243817e-05, + "loss": 0.2681, + "step": 13409, + "teacher_loss": 0.25382718443870544 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.6571687459945679, + "learning_rate": 2.3316076931950148e-05, + "loss": 0.5307, + "step": 13410, + "teacher_loss": 0.5166052579879761 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.4727189242839813, + "learning_rate": 2.3314186595993767e-05, + "loss": 0.3607, + "step": 13411, + "teacher_loss": 0.3482898473739624 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.2935327887535095, + "learning_rate": 2.3312296069418015e-05, + "loss": 0.1832, + "step": 13412, + "teacher_loss": 0.17095747590065002 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.6671868562698364, + "learning_rate": 2.3310405352266237e-05, + "loss": 0.3581, + "step": 13413, + "teacher_loss": 0.32378774881362915 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.18998435139656067, + "learning_rate": 2.3308514444581784e-05, + "loss": 0.191, + "step": 13414, + "teacher_loss": 0.1911236196756363 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.41326427459716797, + "learning_rate": 2.3306623346407996e-05, + "loss": 0.2871, + "step": 13415, + "teacher_loss": 0.2730720341205597 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.2343122661113739, + "learning_rate": 2.330473205778825e-05, + "loss": 0.3045, + "step": 13416, + "teacher_loss": 0.312247633934021 + }, + { + "compression_loss": 0.0, + "epoch": 2.42, + "label_loss": 0.5434048771858215, + "learning_rate": 2.3302840578765886e-05, + "loss": 0.2699, + "step": 13417, + "teacher_loss": 0.23954391479492188 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.3696182370185852, + "learning_rate": 2.3300948909384283e-05, + "loss": 0.2059, + "step": 13418, + "teacher_loss": 0.18775790929794312 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.29339170455932617, + "learning_rate": 2.3299057049686815e-05, + "loss": 0.2122, + "step": 13419, + "teacher_loss": 0.20322604477405548 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.27169322967529297, + "learning_rate": 2.3297164999716846e-05, + "loss": 0.2363, + "step": 13420, + "teacher_loss": 0.23241224884986877 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.42531514167785645, + "learning_rate": 2.3295272759517758e-05, + "loss": 0.3378, + "step": 13421, + "teacher_loss": 0.3281002640724182 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.27721619606018066, + "learning_rate": 2.329338032913294e-05, + "loss": 0.2178, + "step": 13422, + "teacher_loss": 0.2112330049276352 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.30392444133758545, + "learning_rate": 2.3291487708605775e-05, + "loss": 0.2082, + "step": 13423, + "teacher_loss": 0.19759348034858704 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.2887619137763977, + "learning_rate": 2.3289594897979656e-05, + "loss": 0.2316, + "step": 13424, + "teacher_loss": 0.22525274753570557 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.557485818862915, + "learning_rate": 2.3287701897297983e-05, + "loss": 0.399, + "step": 13425, + "teacher_loss": 0.3814348876476288 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5290688276290894, + "learning_rate": 2.3285808706604146e-05, + "loss": 0.2347, + "step": 13426, + "teacher_loss": 0.2020326852798462 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5916503667831421, + "learning_rate": 2.328391532594156e-05, + "loss": 0.2481, + "step": 13427, + "teacher_loss": 0.20994271337985992 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.3555203080177307, + "learning_rate": 2.3282021755353637e-05, + "loss": 0.1747, + "step": 13428, + "teacher_loss": 0.1546405851840973 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.2876202166080475, + "learning_rate": 2.328012799488378e-05, + "loss": 0.2505, + "step": 13429, + "teacher_loss": 0.24635246396064758 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5434819459915161, + "learning_rate": 2.3278234044575414e-05, + "loss": 0.3619, + "step": 13430, + "teacher_loss": 0.3416920304298401 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.336340069770813, + "learning_rate": 2.3276339904471965e-05, + "loss": 0.2256, + "step": 13431, + "teacher_loss": 0.21328291296958923 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.20888549089431763, + "learning_rate": 2.3274445574616853e-05, + "loss": 0.1931, + "step": 13432, + "teacher_loss": 0.1913781315088272 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4624791145324707, + "learning_rate": 2.327255105505351e-05, + "loss": 0.3213, + "step": 13433, + "teacher_loss": 0.30563169717788696 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.37911367416381836, + "learning_rate": 2.327065634582538e-05, + "loss": 0.2878, + "step": 13434, + "teacher_loss": 0.27770939469337463 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4340216815471649, + "learning_rate": 2.3268761446975888e-05, + "loss": 0.3846, + "step": 13435, + "teacher_loss": 0.37915581464767456 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5789175033569336, + "learning_rate": 2.326686635854849e-05, + "loss": 0.2755, + "step": 13436, + "teacher_loss": 0.2418239712715149 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.6702301502227783, + "learning_rate": 2.3264971080586636e-05, + "loss": 0.3156, + "step": 13437, + "teacher_loss": 0.27623870968818665 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.13419650495052338, + "learning_rate": 2.326307561313377e-05, + "loss": 0.1516, + "step": 13438, + "teacher_loss": 0.15357431769371033 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.476350873708725, + "learning_rate": 2.326117995623336e-05, + "loss": 0.261, + "step": 13439, + "teacher_loss": 0.2370595633983612 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4550631046295166, + "learning_rate": 2.3259284109928853e-05, + "loss": 0.2846, + "step": 13440, + "teacher_loss": 0.2656833231449127 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.6323797702789307, + "learning_rate": 2.325738807426373e-05, + "loss": 0.3645, + "step": 13441, + "teacher_loss": 0.33470118045806885 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.726628303527832, + "learning_rate": 2.3255491849281454e-05, + "loss": 0.4423, + "step": 13442, + "teacher_loss": 0.41070467233657837 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5524322390556335, + "learning_rate": 2.3253595435025503e-05, + "loss": 0.3422, + "step": 13443, + "teacher_loss": 0.3188698887825012 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.28182151913642883, + "learning_rate": 2.3251698831539353e-05, + "loss": 0.2372, + "step": 13444, + "teacher_loss": 0.23220627009868622 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5198459029197693, + "learning_rate": 2.3249802038866487e-05, + "loss": 0.2353, + "step": 13445, + "teacher_loss": 0.20369216799736023 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 1.5448665618896484, + "learning_rate": 2.3247905057050395e-05, + "loss": 0.4856, + "step": 13446, + "teacher_loss": 0.36793047189712524 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5161502361297607, + "learning_rate": 2.324600788613457e-05, + "loss": 0.354, + "step": 13447, + "teacher_loss": 0.33603590726852417 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.462638795375824, + "learning_rate": 2.324411052616251e-05, + "loss": 0.3547, + "step": 13448, + "teacher_loss": 0.34273117780685425 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 1.106665015220642, + "learning_rate": 2.3242212977177705e-05, + "loss": 0.3577, + "step": 13449, + "teacher_loss": 0.27451103925704956 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.25974568724632263, + "learning_rate": 2.3240315239223676e-05, + "loss": 0.321, + "step": 13450, + "teacher_loss": 0.3278290033340454 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4112575054168701, + "learning_rate": 2.3238417312343922e-05, + "loss": 0.2067, + "step": 13451, + "teacher_loss": 0.18399155139923096 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.34670233726501465, + "learning_rate": 2.3236519196581953e-05, + "loss": 0.2298, + "step": 13452, + "teacher_loss": 0.21681416034698486 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4199913740158081, + "learning_rate": 2.3234620891981297e-05, + "loss": 0.3213, + "step": 13453, + "teacher_loss": 0.31032997369766235 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.44018006324768066, + "learning_rate": 2.3232722398585474e-05, + "loss": 0.2141, + "step": 13454, + "teacher_loss": 0.18896540999412537 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.28467023372650146, + "learning_rate": 2.323082371643801e-05, + "loss": 0.2231, + "step": 13455, + "teacher_loss": 0.21629387140274048 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.38557907938957214, + "learning_rate": 2.3228924845582433e-05, + "loss": 0.2445, + "step": 13456, + "teacher_loss": 0.22882162034511566 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.2279781699180603, + "learning_rate": 2.322702578606228e-05, + "loss": 0.1784, + "step": 13457, + "teacher_loss": 0.17284482717514038 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.6300699710845947, + "learning_rate": 2.3225126537921094e-05, + "loss": 0.3979, + "step": 13458, + "teacher_loss": 0.3721427321434021 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.23973649740219116, + "learning_rate": 2.3223227101202418e-05, + "loss": 0.2273, + "step": 13459, + "teacher_loss": 0.2258821725845337 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4223008453845978, + "learning_rate": 2.32213274759498e-05, + "loss": 0.267, + "step": 13460, + "teacher_loss": 0.24971584975719452 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.42848461866378784, + "learning_rate": 2.3219427662206788e-05, + "loss": 0.1849, + "step": 13461, + "teacher_loss": 0.1578855812549591 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.3234953284263611, + "learning_rate": 2.3217527660016942e-05, + "loss": 0.2099, + "step": 13462, + "teacher_loss": 0.19728602468967438 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.3779809772968292, + "learning_rate": 2.321562746942383e-05, + "loss": 0.1923, + "step": 13463, + "teacher_loss": 0.17169684171676636 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.42064130306243896, + "learning_rate": 2.3213727090471008e-05, + "loss": 0.2904, + "step": 13464, + "teacher_loss": 0.27595970034599304 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.8038947582244873, + "learning_rate": 2.3211826523202048e-05, + "loss": 0.2557, + "step": 13465, + "teacher_loss": 0.19479334354400635 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.659406304359436, + "learning_rate": 2.3209925767660527e-05, + "loss": 0.3999, + "step": 13466, + "teacher_loss": 0.37101155519485474 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.6156027317047119, + "learning_rate": 2.3208024823890026e-05, + "loss": 0.2063, + "step": 13467, + "teacher_loss": 0.16078908741474152 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.48168832063674927, + "learning_rate": 2.3206123691934117e-05, + "loss": 0.1995, + "step": 13468, + "teacher_loss": 0.16809991002082825 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.23847299814224243, + "learning_rate": 2.320422237183641e-05, + "loss": 0.2532, + "step": 13469, + "teacher_loss": 0.25480973720550537 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.3450428545475006, + "learning_rate": 2.3202320863640466e-05, + "loss": 0.3011, + "step": 13470, + "teacher_loss": 0.2962424159049988 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.4589805603027344, + "learning_rate": 2.32004191673899e-05, + "loss": 0.2563, + "step": 13471, + "teacher_loss": 0.2337377965450287 + }, + { + "compression_loss": 0.0, + "epoch": 2.43, + "label_loss": 0.5096904039382935, + "learning_rate": 2.3198517283128316e-05, + "loss": 0.269, + "step": 13472, + "teacher_loss": 0.24228110909461975 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4801781177520752, + "learning_rate": 2.31966152108993e-05, + "loss": 0.2615, + "step": 13473, + "teacher_loss": 0.23724089562892914 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.17093929648399353, + "learning_rate": 2.3194712950746483e-05, + "loss": 0.2058, + "step": 13474, + "teacher_loss": 0.20963357388973236 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.7945429086685181, + "learning_rate": 2.319281050271346e-05, + "loss": 0.6282, + "step": 13475, + "teacher_loss": 0.6097357273101807 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.34345659613609314, + "learning_rate": 2.319090786684386e-05, + "loss": 0.206, + "step": 13476, + "teacher_loss": 0.1907261312007904 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.22912493348121643, + "learning_rate": 2.3189005043181297e-05, + "loss": 0.1675, + "step": 13477, + "teacher_loss": 0.16064202785491943 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.5353869795799255, + "learning_rate": 2.3187102031769403e-05, + "loss": 0.2536, + "step": 13478, + "teacher_loss": 0.2222995162010193 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4261696934700012, + "learning_rate": 2.3185198832651802e-05, + "loss": 0.2257, + "step": 13479, + "teacher_loss": 0.20341119170188904 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.6609184741973877, + "learning_rate": 2.3183295445872135e-05, + "loss": 0.2756, + "step": 13480, + "teacher_loss": 0.2327386736869812 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.46647441387176514, + "learning_rate": 2.3181391871474036e-05, + "loss": 0.3196, + "step": 13481, + "teacher_loss": 0.30325138568878174 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.5660653114318848, + "learning_rate": 2.317948810950115e-05, + "loss": 0.2939, + "step": 13482, + "teacher_loss": 0.2636072039604187 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.2720910906791687, + "learning_rate": 2.317758415999713e-05, + "loss": 0.1981, + "step": 13483, + "teacher_loss": 0.1898963451385498 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.3639522194862366, + "learning_rate": 2.317568002300562e-05, + "loss": 0.2676, + "step": 13484, + "teacher_loss": 0.2568461298942566 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.3364434838294983, + "learning_rate": 2.3173775698570277e-05, + "loss": 0.2153, + "step": 13485, + "teacher_loss": 0.20181438326835632 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.41619861125946045, + "learning_rate": 2.3171871186734764e-05, + "loss": 0.3277, + "step": 13486, + "teacher_loss": 0.317893922328949 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.8406264781951904, + "learning_rate": 2.3169966487542746e-05, + "loss": 0.2715, + "step": 13487, + "teacher_loss": 0.20826585590839386 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.2673688232898712, + "learning_rate": 2.31680616010379e-05, + "loss": 0.2173, + "step": 13488, + "teacher_loss": 0.21178071200847626 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.41777849197387695, + "learning_rate": 2.3166156527263876e-05, + "loss": 0.2386, + "step": 13489, + "teacher_loss": 0.21866193413734436 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.2996369004249573, + "learning_rate": 2.3164251266264374e-05, + "loss": 0.255, + "step": 13490, + "teacher_loss": 0.2500323951244354 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4252222776412964, + "learning_rate": 2.3162345818083065e-05, + "loss": 0.2196, + "step": 13491, + "teacher_loss": 0.19680823385715485 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.5988937616348267, + "learning_rate": 2.316044018276364e-05, + "loss": 0.3332, + "step": 13492, + "teacher_loss": 0.30370596051216125 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.3215354382991791, + "learning_rate": 2.315853436034979e-05, + "loss": 0.2342, + "step": 13493, + "teacher_loss": 0.22448039054870605 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4576791524887085, + "learning_rate": 2.3156628350885205e-05, + "loss": 0.3783, + "step": 13494, + "teacher_loss": 0.3694588541984558 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.3960202932357788, + "learning_rate": 2.3154722154413585e-05, + "loss": 0.2449, + "step": 13495, + "teacher_loss": 0.22812967002391815 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.565636932849884, + "learning_rate": 2.315281577097864e-05, + "loss": 0.279, + "step": 13496, + "teacher_loss": 0.24714219570159912 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4581831097602844, + "learning_rate": 2.315090920062407e-05, + "loss": 0.2982, + "step": 13497, + "teacher_loss": 0.2804328203201294 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4776240587234497, + "learning_rate": 2.314900244339359e-05, + "loss": 0.301, + "step": 13498, + "teacher_loss": 0.2813946008682251 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.7765868306159973, + "learning_rate": 2.314709549933092e-05, + "loss": 0.2592, + "step": 13499, + "teacher_loss": 0.20169463753700256 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.5115863680839539, + "learning_rate": 2.314518836847977e-05, + "loss": 0.4981, + "step": 13500, + "teacher_loss": 0.49662870168685913 + }, + { + "epoch": 2.44, + "eval_exact_match": 79.82024597918638, + "eval_f1": 87.37283128266982, + "step": 13500 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.7831138372421265, + "learning_rate": 2.3143281050883872e-05, + "loss": 0.3632, + "step": 13501, + "teacher_loss": 0.31658798456192017 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.8992929458618164, + "learning_rate": 2.314137354658696e-05, + "loss": 0.2628, + "step": 13502, + "teacher_loss": 0.19202309846878052 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.31505823135375977, + "learning_rate": 2.3139465855632753e-05, + "loss": 0.2832, + "step": 13503, + "teacher_loss": 0.27961137890815735 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.8971832990646362, + "learning_rate": 2.3137557978065003e-05, + "loss": 0.3337, + "step": 13504, + "teacher_loss": 0.27111852169036865 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.6072332859039307, + "learning_rate": 2.3135649913927445e-05, + "loss": 0.3208, + "step": 13505, + "teacher_loss": 0.2889575660228729 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.3238345682621002, + "learning_rate": 2.3133741663263827e-05, + "loss": 0.2501, + "step": 13506, + "teacher_loss": 0.24186615645885468 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.38973528146743774, + "learning_rate": 2.31318332261179e-05, + "loss": 0.2776, + "step": 13507, + "teacher_loss": 0.2650982141494751 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.5254389643669128, + "learning_rate": 2.3129924602533413e-05, + "loss": 0.2597, + "step": 13508, + "teacher_loss": 0.23022761940956116 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.4465681314468384, + "learning_rate": 2.3128015792554137e-05, + "loss": 0.2886, + "step": 13509, + "teacher_loss": 0.2710148096084595 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.49445420503616333, + "learning_rate": 2.3126106796223823e-05, + "loss": 0.227, + "step": 13510, + "teacher_loss": 0.19727496802806854 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.6732682585716248, + "learning_rate": 2.312419761358624e-05, + "loss": 0.6361, + "step": 13511, + "teacher_loss": 0.6319177150726318 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.62847900390625, + "learning_rate": 2.312228824468517e-05, + "loss": 0.2597, + "step": 13512, + "teacher_loss": 0.2186773121356964 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.2787730395793915, + "learning_rate": 2.312037868956438e-05, + "loss": 0.2234, + "step": 13513, + "teacher_loss": 0.21728336811065674 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.6420366168022156, + "learning_rate": 2.3118468948267653e-05, + "loss": 0.2894, + "step": 13514, + "teacher_loss": 0.2502191960811615 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.15983831882476807, + "learning_rate": 2.3116559020838772e-05, + "loss": 0.1696, + "step": 13515, + "teacher_loss": 0.17067524790763855 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.2552987337112427, + "learning_rate": 2.311464890732153e-05, + "loss": 0.2877, + "step": 13516, + "teacher_loss": 0.29128482937812805 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.20469996333122253, + "learning_rate": 2.3112738607759713e-05, + "loss": 0.2357, + "step": 13517, + "teacher_loss": 0.2391444742679596 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.5051414966583252, + "learning_rate": 2.311082812219713e-05, + "loss": 0.2403, + "step": 13518, + "teacher_loss": 0.2108653485774994 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.8079785108566284, + "learning_rate": 2.310891745067757e-05, + "loss": 0.7205, + "step": 13519, + "teacher_loss": 0.710726261138916 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.6751030683517456, + "learning_rate": 2.3107006593244848e-05, + "loss": 0.3371, + "step": 13520, + "teacher_loss": 0.2995164394378662 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.17315822839736938, + "learning_rate": 2.3105095549942768e-05, + "loss": 0.23, + "step": 13521, + "teacher_loss": 0.23635897040367126 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.7756048440933228, + "learning_rate": 2.310318432081515e-05, + "loss": 0.5477, + "step": 13522, + "teacher_loss": 0.5224227905273438 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.8406456112861633, + "learning_rate": 2.3101272905905808e-05, + "loss": 0.2652, + "step": 13523, + "teacher_loss": 0.2012418508529663 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 1.1960383653640747, + "learning_rate": 2.309936130525857e-05, + "loss": 0.4351, + "step": 13524, + "teacher_loss": 0.3505184054374695 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.8032013177871704, + "learning_rate": 2.3097449518917257e-05, + "loss": 0.4553, + "step": 13525, + "teacher_loss": 0.4166238307952881 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.3685430884361267, + "learning_rate": 2.3095537546925705e-05, + "loss": 0.2547, + "step": 13526, + "teacher_loss": 0.24200661480426788 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.35262101888656616, + "learning_rate": 2.309362538932775e-05, + "loss": 0.299, + "step": 13527, + "teacher_loss": 0.29305779933929443 + }, + { + "compression_loss": 0.0, + "epoch": 2.44, + "label_loss": 0.7022177577018738, + "learning_rate": 2.309171304616723e-05, + "loss": 0.2438, + "step": 13528, + "teacher_loss": 0.19287629425525665 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.3267899751663208, + "learning_rate": 2.308980051748799e-05, + "loss": 0.225, + "step": 13529, + "teacher_loss": 0.21364432573318481 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.6883553266525269, + "learning_rate": 2.3087887803333878e-05, + "loss": 0.302, + "step": 13530, + "teacher_loss": 0.2590651214122772 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5634912252426147, + "learning_rate": 2.3085974903748746e-05, + "loss": 0.3152, + "step": 13531, + "teacher_loss": 0.2875780463218689 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.8322135210037231, + "learning_rate": 2.3084061818776458e-05, + "loss": 0.3014, + "step": 13532, + "teacher_loss": 0.2423807978630066 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5890605449676514, + "learning_rate": 2.3082148548460862e-05, + "loss": 0.2577, + "step": 13533, + "teacher_loss": 0.22084945440292358 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.27247971296310425, + "learning_rate": 2.3080235092845838e-05, + "loss": 0.3106, + "step": 13534, + "teacher_loss": 0.31482094526290894 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.27078306674957275, + "learning_rate": 2.307832145197525e-05, + "loss": 0.1948, + "step": 13535, + "teacher_loss": 0.18635180592536926 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.53783118724823, + "learning_rate": 2.3076407625892964e-05, + "loss": 0.2465, + "step": 13536, + "teacher_loss": 0.21417482197284698 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.2757284939289093, + "learning_rate": 2.307449361464287e-05, + "loss": 0.1609, + "step": 13537, + "teacher_loss": 0.14813555777072906 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5906493067741394, + "learning_rate": 2.3072579418268843e-05, + "loss": 0.2749, + "step": 13538, + "teacher_loss": 0.23986464738845825 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 1.0673515796661377, + "learning_rate": 2.3070665036814776e-05, + "loss": 0.5467, + "step": 13539, + "teacher_loss": 0.4888056218624115 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.24932169914245605, + "learning_rate": 2.3068750470324556e-05, + "loss": 0.1885, + "step": 13540, + "teacher_loss": 0.18175092339515686 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.7235797047615051, + "learning_rate": 2.306683571884208e-05, + "loss": 0.2743, + "step": 13541, + "teacher_loss": 0.2243853211402893 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.452308714389801, + "learning_rate": 2.3064920782411243e-05, + "loss": 0.2124, + "step": 13542, + "teacher_loss": 0.18578092753887177 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.07145362347364426, + "learning_rate": 2.3063005661075957e-05, + "loss": 0.1981, + "step": 13543, + "teacher_loss": 0.21221689879894257 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5338555574417114, + "learning_rate": 2.3061090354880125e-05, + "loss": 0.2268, + "step": 13544, + "teacher_loss": 0.19263795018196106 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.11490755528211594, + "learning_rate": 2.3059174863867656e-05, + "loss": 0.1627, + "step": 13545, + "teacher_loss": 0.16803517937660217 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.623649001121521, + "learning_rate": 2.3057259188082475e-05, + "loss": 0.2679, + "step": 13546, + "teacher_loss": 0.2283647358417511 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5715545415878296, + "learning_rate": 2.305534332756849e-05, + "loss": 0.3059, + "step": 13547, + "teacher_loss": 0.27636194229125977 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.4184514582157135, + "learning_rate": 2.305342728236964e-05, + "loss": 0.2936, + "step": 13548, + "teacher_loss": 0.27975472807884216 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.4545406699180603, + "learning_rate": 2.3051511052529847e-05, + "loss": 0.2879, + "step": 13549, + "teacher_loss": 0.2693905830383301 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.7193959951400757, + "learning_rate": 2.3049594638093044e-05, + "loss": 0.3071, + "step": 13550, + "teacher_loss": 0.2613123953342438 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.40149033069610596, + "learning_rate": 2.304767803910317e-05, + "loss": 0.1873, + "step": 13551, + "teacher_loss": 0.16345813870429993 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.8751025199890137, + "learning_rate": 2.3045761255604168e-05, + "loss": 0.4507, + "step": 13552, + "teacher_loss": 0.40358966588974 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.45521363615989685, + "learning_rate": 2.304384428763998e-05, + "loss": 0.3242, + "step": 13553, + "teacher_loss": 0.3096349835395813 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.39776796102523804, + "learning_rate": 2.3041927135254564e-05, + "loss": 0.3577, + "step": 13554, + "teacher_loss": 0.3532451391220093 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.2618064284324646, + "learning_rate": 2.304000979849186e-05, + "loss": 0.1873, + "step": 13555, + "teacher_loss": 0.17898762226104736 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.8584522008895874, + "learning_rate": 2.3038092277395847e-05, + "loss": 0.3402, + "step": 13556, + "teacher_loss": 0.2826330065727234 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5318832397460938, + "learning_rate": 2.3036174572010474e-05, + "loss": 0.2413, + "step": 13557, + "teacher_loss": 0.20902976393699646 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.7420346736907959, + "learning_rate": 2.303425668237971e-05, + "loss": 0.3905, + "step": 13558, + "teacher_loss": 0.35139989852905273 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.20058172941207886, + "learning_rate": 2.303233860854753e-05, + "loss": 0.1864, + "step": 13559, + "teacher_loss": 0.18485969305038452 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.301930695772171, + "learning_rate": 2.303042035055791e-05, + "loss": 0.2167, + "step": 13560, + "teacher_loss": 0.20727437734603882 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.4468812942504883, + "learning_rate": 2.3028501908454827e-05, + "loss": 0.238, + "step": 13561, + "teacher_loss": 0.21475017070770264 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.9812082648277283, + "learning_rate": 2.3026583282282262e-05, + "loss": 0.311, + "step": 13562, + "teacher_loss": 0.2364833950996399 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5296786427497864, + "learning_rate": 2.302466447208421e-05, + "loss": 0.2354, + "step": 13563, + "teacher_loss": 0.2026805579662323 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.4274221956729889, + "learning_rate": 2.302274547790466e-05, + "loss": 0.2771, + "step": 13564, + "teacher_loss": 0.26036199927330017 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.4614905118942261, + "learning_rate": 2.302082629978761e-05, + "loss": 0.3495, + "step": 13565, + "teacher_loss": 0.33708882331848145 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.16040746867656708, + "learning_rate": 2.301890693777706e-05, + "loss": 0.1789, + "step": 13566, + "teacher_loss": 0.18090671300888062 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.1445402354001999, + "learning_rate": 2.3016987391917016e-05, + "loss": 0.1956, + "step": 13567, + "teacher_loss": 0.20126408338546753 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.574134349822998, + "learning_rate": 2.301506766225149e-05, + "loss": 0.5372, + "step": 13568, + "teacher_loss": 0.5330567359924316 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.6888896822929382, + "learning_rate": 2.301314774882449e-05, + "loss": 0.3609, + "step": 13569, + "teacher_loss": 0.32447364926338196 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5721328854560852, + "learning_rate": 2.301122765168004e-05, + "loss": 0.2373, + "step": 13570, + "teacher_loss": 0.20014673471450806 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.33922451734542847, + "learning_rate": 2.3009307370862154e-05, + "loss": 0.3426, + "step": 13571, + "teacher_loss": 0.34297117590904236 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.3162120282649994, + "learning_rate": 2.3007386906414864e-05, + "loss": 0.2089, + "step": 13572, + "teacher_loss": 0.1970038115978241 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.49213704466819763, + "learning_rate": 2.3005466258382202e-05, + "loss": 0.2659, + "step": 13573, + "teacher_loss": 0.2407526820898056 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.3871278166770935, + "learning_rate": 2.3003545426808204e-05, + "loss": 0.2548, + "step": 13574, + "teacher_loss": 0.24011310935020447 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 1.0373250246047974, + "learning_rate": 2.3001624411736894e-05, + "loss": 0.3503, + "step": 13575, + "teacher_loss": 0.27401435375213623 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.436976820230484, + "learning_rate": 2.299970321321234e-05, + "loss": 0.2298, + "step": 13576, + "teacher_loss": 0.20673570036888123 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.39382803440093994, + "learning_rate": 2.2997781831278565e-05, + "loss": 0.2198, + "step": 13577, + "teacher_loss": 0.20043224096298218 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.7106810808181763, + "learning_rate": 2.2995860265979636e-05, + "loss": 0.2714, + "step": 13578, + "teacher_loss": 0.2225722074508667 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.39781245589256287, + "learning_rate": 2.2993938517359604e-05, + "loss": 0.2693, + "step": 13579, + "teacher_loss": 0.25506889820098877 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.312117338180542, + "learning_rate": 2.2992016585462522e-05, + "loss": 0.2289, + "step": 13580, + "teacher_loss": 0.21964344382286072 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.24402910470962524, + "learning_rate": 2.299009447033247e-05, + "loss": 0.2791, + "step": 13581, + "teacher_loss": 0.28297752141952515 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.3927075266838074, + "learning_rate": 2.2988172172013507e-05, + "loss": 0.2684, + "step": 13582, + "teacher_loss": 0.2546292543411255 + }, + { + "compression_loss": 0.0, + "epoch": 2.45, + "label_loss": 0.5710468292236328, + "learning_rate": 2.29862496905497e-05, + "loss": 0.2603, + "step": 13583, + "teacher_loss": 0.22572194039821625 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.43110892176628113, + "learning_rate": 2.2984327025985138e-05, + "loss": 0.2206, + "step": 13584, + "teacher_loss": 0.1971992701292038 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5896751880645752, + "learning_rate": 2.298240417836389e-05, + "loss": 0.3364, + "step": 13585, + "teacher_loss": 0.3082408308982849 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5967991352081299, + "learning_rate": 2.298048114773005e-05, + "loss": 0.3002, + "step": 13586, + "teacher_loss": 0.26725292205810547 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.24697265028953552, + "learning_rate": 2.2978557934127704e-05, + "loss": 0.2375, + "step": 13587, + "teacher_loss": 0.2364168018102646 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.41696450114250183, + "learning_rate": 2.297663453760094e-05, + "loss": 0.2509, + "step": 13588, + "teacher_loss": 0.23244819045066833 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 1.0541925430297852, + "learning_rate": 2.297471095819387e-05, + "loss": 0.4058, + "step": 13589, + "teacher_loss": 0.33377522230148315 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.24102690815925598, + "learning_rate": 2.297278719595058e-05, + "loss": 0.1738, + "step": 13590, + "teacher_loss": 0.1663818061351776 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.6702922582626343, + "learning_rate": 2.2970863250915187e-05, + "loss": 0.3997, + "step": 13591, + "teacher_loss": 0.3696141242980957 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.607141375541687, + "learning_rate": 2.29689391231318e-05, + "loss": 0.2109, + "step": 13592, + "teacher_loss": 0.16683481633663177 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3341156840324402, + "learning_rate": 2.2967014812644525e-05, + "loss": 0.2032, + "step": 13593, + "teacher_loss": 0.18869765102863312 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.6203186511993408, + "learning_rate": 2.296509031949749e-05, + "loss": 0.2597, + "step": 13594, + "teacher_loss": 0.21967898309230804 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.33753299713134766, + "learning_rate": 2.2963165643734813e-05, + "loss": 0.2123, + "step": 13595, + "teacher_loss": 0.1983848512172699 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.26318567991256714, + "learning_rate": 2.2961240785400623e-05, + "loss": 0.1827, + "step": 13596, + "teacher_loss": 0.17380431294441223 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3265713155269623, + "learning_rate": 2.295931574453905e-05, + "loss": 0.2955, + "step": 13597, + "teacher_loss": 0.2920163571834564 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.38764330744743347, + "learning_rate": 2.2957390521194234e-05, + "loss": 0.2875, + "step": 13598, + "teacher_loss": 0.2763361632823944 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5720712542533875, + "learning_rate": 2.295546511541031e-05, + "loss": 0.335, + "step": 13599, + "teacher_loss": 0.3086398243904114 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.8484443426132202, + "learning_rate": 2.2953539527231423e-05, + "loss": 0.3601, + "step": 13600, + "teacher_loss": 0.30589479207992554 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3951161205768585, + "learning_rate": 2.2951613756701716e-05, + "loss": 0.3482, + "step": 13601, + "teacher_loss": 0.34295085072517395 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.486197829246521, + "learning_rate": 2.2949687803865346e-05, + "loss": 0.3401, + "step": 13602, + "teacher_loss": 0.3238406777381897 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.4647293984889984, + "learning_rate": 2.2947761668766472e-05, + "loss": 0.2806, + "step": 13603, + "teacher_loss": 0.26011455059051514 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.4722694158554077, + "learning_rate": 2.2945835351449252e-05, + "loss": 0.3578, + "step": 13604, + "teacher_loss": 0.34504833817481995 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.18278971314430237, + "learning_rate": 2.294390885195785e-05, + "loss": 0.1776, + "step": 13605, + "teacher_loss": 0.1770172268152237 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.2531588673591614, + "learning_rate": 2.2941982170336434e-05, + "loss": 0.2413, + "step": 13606, + "teacher_loss": 0.23996543884277344 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.6120121479034424, + "learning_rate": 2.2940055306629174e-05, + "loss": 0.2723, + "step": 13607, + "teacher_loss": 0.23450899124145508 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.49934884905815125, + "learning_rate": 2.2938128260880256e-05, + "loss": 0.242, + "step": 13608, + "teacher_loss": 0.21343611180782318 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5729543566703796, + "learning_rate": 2.293620103313386e-05, + "loss": 0.3002, + "step": 13609, + "teacher_loss": 0.2698464095592499 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.44234800338745117, + "learning_rate": 2.2934273623434163e-05, + "loss": 0.261, + "step": 13610, + "teacher_loss": 0.24080948531627655 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.30538347363471985, + "learning_rate": 2.2932346031825362e-05, + "loss": 0.4011, + "step": 13611, + "teacher_loss": 0.4117584824562073 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.4321520924568176, + "learning_rate": 2.2930418258351648e-05, + "loss": 0.263, + "step": 13612, + "teacher_loss": 0.24420154094696045 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3798518478870392, + "learning_rate": 2.292849030305722e-05, + "loss": 0.2787, + "step": 13613, + "teacher_loss": 0.2674141526222229 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.8263616561889648, + "learning_rate": 2.2926562165986285e-05, + "loss": 0.3107, + "step": 13614, + "teacher_loss": 0.25343558192253113 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.4937240779399872, + "learning_rate": 2.2924633847183042e-05, + "loss": 0.2351, + "step": 13615, + "teacher_loss": 0.2063518464565277 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3143256604671478, + "learning_rate": 2.29227053466917e-05, + "loss": 0.3111, + "step": 13616, + "teacher_loss": 0.31070607900619507 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.434296190738678, + "learning_rate": 2.292077666455648e-05, + "loss": 0.2511, + "step": 13617, + "teacher_loss": 0.2307668924331665 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5667064189910889, + "learning_rate": 2.29188478008216e-05, + "loss": 0.2154, + "step": 13618, + "teacher_loss": 0.17634794116020203 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5959628820419312, + "learning_rate": 2.291691875553128e-05, + "loss": 0.39, + "step": 13619, + "teacher_loss": 0.3671639859676361 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.49233677983283997, + "learning_rate": 2.291498952872975e-05, + "loss": 0.2882, + "step": 13620, + "teacher_loss": 0.26552340388298035 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.21298331022262573, + "learning_rate": 2.2913060120461244e-05, + "loss": 0.2252, + "step": 13621, + "teacher_loss": 0.22653484344482422 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5891911387443542, + "learning_rate": 2.2911130530769988e-05, + "loss": 0.3977, + "step": 13622, + "teacher_loss": 0.3764520287513733 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.2632080018520355, + "learning_rate": 2.290920075970023e-05, + "loss": 0.2905, + "step": 13623, + "teacher_loss": 0.29351699352264404 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.28599345684051514, + "learning_rate": 2.2907270807296214e-05, + "loss": 0.146, + "step": 13624, + "teacher_loss": 0.13047613203525543 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.8331006765365601, + "learning_rate": 2.2905340673602184e-05, + "loss": 0.3826, + "step": 13625, + "teacher_loss": 0.3325856029987335 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3796558380126953, + "learning_rate": 2.2903410358662392e-05, + "loss": 0.4013, + "step": 13626, + "teacher_loss": 0.40374141931533813 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5125864744186401, + "learning_rate": 2.2901479862521094e-05, + "loss": 0.3082, + "step": 13627, + "teacher_loss": 0.285462886095047 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.42441636323928833, + "learning_rate": 2.2899549185222562e-05, + "loss": 0.2401, + "step": 13628, + "teacher_loss": 0.21959525346755981 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.5916315913200378, + "learning_rate": 2.2897618326811042e-05, + "loss": 0.2817, + "step": 13629, + "teacher_loss": 0.24720904231071472 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.10614937543869019, + "learning_rate": 2.2895687287330817e-05, + "loss": 0.1914, + "step": 13630, + "teacher_loss": 0.20090511441230774 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.6010932922363281, + "learning_rate": 2.2893756066826153e-05, + "loss": 0.251, + "step": 13631, + "teacher_loss": 0.21209104359149933 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.47895514965057373, + "learning_rate": 2.2891824665341333e-05, + "loss": 0.2046, + "step": 13632, + "teacher_loss": 0.17416900396347046 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.7816649675369263, + "learning_rate": 2.288989308292063e-05, + "loss": 0.3334, + "step": 13633, + "teacher_loss": 0.2835521996021271 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.6701295971870422, + "learning_rate": 2.2887961319608335e-05, + "loss": 0.3152, + "step": 13634, + "teacher_loss": 0.2758147716522217 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 1.0728983879089355, + "learning_rate": 2.2886029375448733e-05, + "loss": 0.6157, + "step": 13635, + "teacher_loss": 0.5649242997169495 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.3278043568134308, + "learning_rate": 2.2884097250486127e-05, + "loss": 0.255, + "step": 13636, + "teacher_loss": 0.2469637095928192 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.30848586559295654, + "learning_rate": 2.2882164944764805e-05, + "loss": 0.2144, + "step": 13637, + "teacher_loss": 0.20397846400737762 + }, + { + "compression_loss": 0.0, + "epoch": 2.46, + "label_loss": 0.49018996953964233, + "learning_rate": 2.2880232458329073e-05, + "loss": 0.2669, + "step": 13638, + "teacher_loss": 0.24208678305149078 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.293617308139801, + "learning_rate": 2.287829979122324e-05, + "loss": 0.2031, + "step": 13639, + "teacher_loss": 0.19307485222816467 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.6620434522628784, + "learning_rate": 2.2876366943491606e-05, + "loss": 0.6101, + "step": 13640, + "teacher_loss": 0.604293942451477 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3760091960430145, + "learning_rate": 2.2874433915178502e-05, + "loss": 0.2106, + "step": 13641, + "teacher_loss": 0.19220757484436035 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3357684314250946, + "learning_rate": 2.2872500706328234e-05, + "loss": 0.2174, + "step": 13642, + "teacher_loss": 0.20422470569610596 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.7679245471954346, + "learning_rate": 2.287056731698512e-05, + "loss": 0.3028, + "step": 13643, + "teacher_loss": 0.25116145610809326 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3222368657588959, + "learning_rate": 2.2868633747193503e-05, + "loss": 0.2017, + "step": 13644, + "teacher_loss": 0.18833978474140167 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.35985779762268066, + "learning_rate": 2.28666999969977e-05, + "loss": 0.3384, + "step": 13645, + "teacher_loss": 0.335971474647522 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.2317141890525818, + "learning_rate": 2.2864766066442056e-05, + "loss": 0.1853, + "step": 13646, + "teacher_loss": 0.18008866906166077 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.35469698905944824, + "learning_rate": 2.2862831955570908e-05, + "loss": 0.2761, + "step": 13647, + "teacher_loss": 0.2673995792865753 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.2339063584804535, + "learning_rate": 2.286089766442859e-05, + "loss": 0.2357, + "step": 13648, + "teacher_loss": 0.23589617013931274 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3616529107093811, + "learning_rate": 2.285896319305946e-05, + "loss": 0.1652, + "step": 13649, + "teacher_loss": 0.14334672689437866 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.6805123686790466, + "learning_rate": 2.2857028541507873e-05, + "loss": 0.3, + "step": 13650, + "teacher_loss": 0.25769805908203125 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.5926121473312378, + "learning_rate": 2.285509370981817e-05, + "loss": 0.4163, + "step": 13651, + "teacher_loss": 0.3967229127883911 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.26732316613197327, + "learning_rate": 2.285315869803472e-05, + "loss": 0.2134, + "step": 13652, + "teacher_loss": 0.2074199914932251 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.23500683903694153, + "learning_rate": 2.2851223506201887e-05, + "loss": 0.1476, + "step": 13653, + "teacher_loss": 0.13788092136383057 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.35115116834640503, + "learning_rate": 2.2849288134364036e-05, + "loss": 0.2364, + "step": 13654, + "teacher_loss": 0.22362536191940308 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.46718618273735046, + "learning_rate": 2.2847352582565547e-05, + "loss": 0.2771, + "step": 13655, + "teacher_loss": 0.2560134828090668 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.316895455121994, + "learning_rate": 2.2845416850850786e-05, + "loss": 0.185, + "step": 13656, + "teacher_loss": 0.17029224336147308 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.46598106622695923, + "learning_rate": 2.2843480939264143e-05, + "loss": 0.2348, + "step": 13657, + "teacher_loss": 0.20910638570785522 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.6750378012657166, + "learning_rate": 2.2841544847849994e-05, + "loss": 0.4622, + "step": 13658, + "teacher_loss": 0.4385721683502197 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.647241473197937, + "learning_rate": 2.2839608576652735e-05, + "loss": 0.2646, + "step": 13659, + "teacher_loss": 0.22206813097000122 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.7658380270004272, + "learning_rate": 2.2837672125716752e-05, + "loss": 0.2504, + "step": 13660, + "teacher_loss": 0.1931057572364807 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.6259948015213013, + "learning_rate": 2.2835735495086446e-05, + "loss": 0.296, + "step": 13661, + "teacher_loss": 0.2593657076358795 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.1222652792930603, + "learning_rate": 2.2833798684806222e-05, + "loss": 0.1589, + "step": 13662, + "teacher_loss": 0.16296085715293884 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.25351467728614807, + "learning_rate": 2.2831861694920478e-05, + "loss": 0.3216, + "step": 13663, + "teacher_loss": 0.32919585704803467 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3411373496055603, + "learning_rate": 2.2829924525473628e-05, + "loss": 0.2963, + "step": 13664, + "teacher_loss": 0.2913018763065338 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.27812865376472473, + "learning_rate": 2.2827987176510082e-05, + "loss": 0.1447, + "step": 13665, + "teacher_loss": 0.12988203763961792 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.4434473216533661, + "learning_rate": 2.282604964807426e-05, + "loss": 0.2986, + "step": 13666, + "teacher_loss": 0.28251171112060547 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 1.2014683485031128, + "learning_rate": 2.2824111940210587e-05, + "loss": 0.4971, + "step": 13667, + "teacher_loss": 0.4188101887702942 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.592219352722168, + "learning_rate": 2.2822174052963478e-05, + "loss": 0.2979, + "step": 13668, + "teacher_loss": 0.26520490646362305 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.8525668382644653, + "learning_rate": 2.2820235986377376e-05, + "loss": 0.3358, + "step": 13669, + "teacher_loss": 0.2783851623535156 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.6147552728652954, + "learning_rate": 2.2818297740496704e-05, + "loss": 0.212, + "step": 13670, + "teacher_loss": 0.1672194004058838 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.9210973978042603, + "learning_rate": 2.281635931536591e-05, + "loss": 0.4474, + "step": 13671, + "teacher_loss": 0.39478808641433716 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.5139021873474121, + "learning_rate": 2.2814420711029432e-05, + "loss": 0.266, + "step": 13672, + "teacher_loss": 0.23847809433937073 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 1.3612399101257324, + "learning_rate": 2.281248192753171e-05, + "loss": 0.309, + "step": 13673, + "teacher_loss": 0.1921277791261673 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.5665510296821594, + "learning_rate": 2.2810542964917205e-05, + "loss": 0.2636, + "step": 13674, + "teacher_loss": 0.22989961504936218 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.27369365096092224, + "learning_rate": 2.2808603823230368e-05, + "loss": 0.2091, + "step": 13675, + "teacher_loss": 0.20193517208099365 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.2124057561159134, + "learning_rate": 2.2806664502515657e-05, + "loss": 0.1843, + "step": 13676, + "teacher_loss": 0.1811448484659195 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.9935253858566284, + "learning_rate": 2.280472500281753e-05, + "loss": 0.3078, + "step": 13677, + "teacher_loss": 0.23161442577838898 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.5461657643318176, + "learning_rate": 2.2802785324180458e-05, + "loss": 0.3221, + "step": 13678, + "teacher_loss": 0.2972163259983063 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.4159039556980133, + "learning_rate": 2.280084546664892e-05, + "loss": 0.2603, + "step": 13679, + "teacher_loss": 0.2430488020181656 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.4179351329803467, + "learning_rate": 2.279890543026738e-05, + "loss": 0.2169, + "step": 13680, + "teacher_loss": 0.1945488452911377 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.4101651906967163, + "learning_rate": 2.2796965215080317e-05, + "loss": 0.2323, + "step": 13681, + "teacher_loss": 0.21253517270088196 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3306651711463928, + "learning_rate": 2.2795024821132225e-05, + "loss": 0.1986, + "step": 13682, + "teacher_loss": 0.1839318573474884 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.2958616316318512, + "learning_rate": 2.2793084248467577e-05, + "loss": 0.2224, + "step": 13683, + "teacher_loss": 0.21426919102668762 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.20312482118606567, + "learning_rate": 2.2791143497130878e-05, + "loss": 0.2683, + "step": 13684, + "teacher_loss": 0.2755442261695862 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.3776360750198364, + "learning_rate": 2.2789202567166615e-05, + "loss": 0.328, + "step": 13685, + "teacher_loss": 0.3225070536136627 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.38910961151123047, + "learning_rate": 2.2787261458619292e-05, + "loss": 0.2786, + "step": 13686, + "teacher_loss": 0.2662811875343323 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.39175790548324585, + "learning_rate": 2.278532017153341e-05, + "loss": 0.2521, + "step": 13687, + "teacher_loss": 0.23654311895370483 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.8858953714370728, + "learning_rate": 2.2783378705953477e-05, + "loss": 0.2982, + "step": 13688, + "teacher_loss": 0.2328588217496872 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.5204565525054932, + "learning_rate": 2.278143706192401e-05, + "loss": 0.3268, + "step": 13689, + "teacher_loss": 0.30532315373420715 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.831377387046814, + "learning_rate": 2.2779495239489513e-05, + "loss": 0.3376, + "step": 13690, + "teacher_loss": 0.2827402353286743 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.5325830578804016, + "learning_rate": 2.277755323869452e-05, + "loss": 0.2401, + "step": 13691, + "teacher_loss": 0.20755130052566528 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 1.0558143854141235, + "learning_rate": 2.277561105958355e-05, + "loss": 0.4765, + "step": 13692, + "teacher_loss": 0.41213589906692505 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 1.0601139068603516, + "learning_rate": 2.277366870220113e-05, + "loss": 0.902, + "step": 13693, + "teacher_loss": 0.8844602108001709 + }, + { + "compression_loss": 0.0, + "epoch": 2.47, + "label_loss": 0.25625261664390564, + "learning_rate": 2.2771726166591796e-05, + "loss": 0.2883, + "step": 13694, + "teacher_loss": 0.2918834388256073 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.5181944966316223, + "learning_rate": 2.2769783452800073e-05, + "loss": 0.2375, + "step": 13695, + "teacher_loss": 0.20632264018058777 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3610687851905823, + "learning_rate": 2.276784056087052e-05, + "loss": 0.2379, + "step": 13696, + "teacher_loss": 0.22426950931549072 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.582071840763092, + "learning_rate": 2.2765897490847668e-05, + "loss": 0.3164, + "step": 13697, + "teacher_loss": 0.28687548637390137 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.18225809931755066, + "learning_rate": 2.2763954242776067e-05, + "loss": 0.1943, + "step": 13698, + "teacher_loss": 0.19561699032783508 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.16500063240528107, + "learning_rate": 2.276201081670028e-05, + "loss": 0.1392, + "step": 13699, + "teacher_loss": 0.13629081845283508 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.24366965889930725, + "learning_rate": 2.2760067212664852e-05, + "loss": 0.1877, + "step": 13700, + "teacher_loss": 0.18144258856773376 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.32557782530784607, + "learning_rate": 2.2758123430714347e-05, + "loss": 0.2748, + "step": 13701, + "teacher_loss": 0.26913610100746155 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.370863139629364, + "learning_rate": 2.275617947089334e-05, + "loss": 0.2356, + "step": 13702, + "teacher_loss": 0.220546692609787 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.35808438062667847, + "learning_rate": 2.2754235333246382e-05, + "loss": 0.2261, + "step": 13703, + "teacher_loss": 0.2114272117614746 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.8877078294754028, + "learning_rate": 2.2752291017818064e-05, + "loss": 0.3735, + "step": 13704, + "teacher_loss": 0.316368043422699 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3893854022026062, + "learning_rate": 2.2750346524652953e-05, + "loss": 0.2087, + "step": 13705, + "teacher_loss": 0.1886221468448639 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.5065631866455078, + "learning_rate": 2.2748401853795628e-05, + "loss": 0.2736, + "step": 13706, + "teacher_loss": 0.247752383351326 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 1.1485528945922852, + "learning_rate": 2.274645700529069e-05, + "loss": 1.0053, + "step": 13707, + "teacher_loss": 0.9893661141395569 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.6708950996398926, + "learning_rate": 2.274451197918271e-05, + "loss": 0.2983, + "step": 13708, + "teacher_loss": 0.2568681836128235 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.47390109300613403, + "learning_rate": 2.2742566775516287e-05, + "loss": 0.3766, + "step": 13709, + "teacher_loss": 0.36576753854751587 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.24836969375610352, + "learning_rate": 2.2740621394336027e-05, + "loss": 0.1655, + "step": 13710, + "teacher_loss": 0.15625017881393433 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.5187473297119141, + "learning_rate": 2.2738675835686522e-05, + "loss": 0.5512, + "step": 13711, + "teacher_loss": 0.5548242926597595 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3925529420375824, + "learning_rate": 2.2736730099612387e-05, + "loss": 0.2497, + "step": 13712, + "teacher_loss": 0.2338358759880066 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.27833569049835205, + "learning_rate": 2.2734784186158225e-05, + "loss": 0.196, + "step": 13713, + "teacher_loss": 0.18688370287418365 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.45011603832244873, + "learning_rate": 2.273283809536865e-05, + "loss": 0.2082, + "step": 13714, + "teacher_loss": 0.18135929107666016 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3631151616573334, + "learning_rate": 2.273089182728828e-05, + "loss": 0.2369, + "step": 13715, + "teacher_loss": 0.22282442450523376 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.37081897258758545, + "learning_rate": 2.2728945381961744e-05, + "loss": 0.2882, + "step": 13716, + "teacher_loss": 0.2789686918258667 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.2927539646625519, + "learning_rate": 2.272699875943366e-05, + "loss": 0.3029, + "step": 13717, + "teacher_loss": 0.3040761649608612 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.22381678223609924, + "learning_rate": 2.2725051959748658e-05, + "loss": 0.1623, + "step": 13718, + "teacher_loss": 0.15544864535331726 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.5651026964187622, + "learning_rate": 2.272310498295138e-05, + "loss": 0.2576, + "step": 13719, + "teacher_loss": 0.2234533429145813 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.35448452830314636, + "learning_rate": 2.2721157829086458e-05, + "loss": 0.3098, + "step": 13720, + "teacher_loss": 0.30482715368270874 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.4623692035675049, + "learning_rate": 2.2719210498198537e-05, + "loss": 0.2901, + "step": 13721, + "teacher_loss": 0.27094146609306335 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.4959922730922699, + "learning_rate": 2.2717262990332266e-05, + "loss": 0.278, + "step": 13722, + "teacher_loss": 0.2537287473678589 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.22338610887527466, + "learning_rate": 2.2715315305532282e-05, + "loss": 0.2336, + "step": 13723, + "teacher_loss": 0.23469537496566772 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.4517146646976471, + "learning_rate": 2.2713367443843263e-05, + "loss": 0.2339, + "step": 13724, + "teacher_loss": 0.20966269075870514 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.44919121265411377, + "learning_rate": 2.2711419405309845e-05, + "loss": 0.2228, + "step": 13725, + "teacher_loss": 0.1976395845413208 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.32712122797966003, + "learning_rate": 2.2709471189976704e-05, + "loss": 0.1793, + "step": 13726, + "teacher_loss": 0.16283798217773438 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.5903319716453552, + "learning_rate": 2.27075227978885e-05, + "loss": 0.3131, + "step": 13727, + "teacher_loss": 0.28224337100982666 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.4159919321537018, + "learning_rate": 2.270557422908991e-05, + "loss": 0.205, + "step": 13728, + "teacher_loss": 0.18154002726078033 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.2592383921146393, + "learning_rate": 2.2703625483625603e-05, + "loss": 0.2099, + "step": 13729, + "teacher_loss": 0.20439793169498444 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.574203372001648, + "learning_rate": 2.2701676561540263e-05, + "loss": 0.4423, + "step": 13730, + "teacher_loss": 0.4276687502861023 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.6093292236328125, + "learning_rate": 2.2699727462878565e-05, + "loss": 0.4333, + "step": 13731, + "teacher_loss": 0.4137030839920044 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.9656829833984375, + "learning_rate": 2.269777818768521e-05, + "loss": 0.2966, + "step": 13732, + "teacher_loss": 0.2222839891910553 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.6082431077957153, + "learning_rate": 2.269582873600487e-05, + "loss": 0.3037, + "step": 13733, + "teacher_loss": 0.26983505487442017 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.5657515525817871, + "learning_rate": 2.2693879107882255e-05, + "loss": 0.303, + "step": 13734, + "teacher_loss": 0.2737973928451538 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.662318229675293, + "learning_rate": 2.2691929303362063e-05, + "loss": 0.6768, + "step": 13735, + "teacher_loss": 0.6783580780029297 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.25294923782348633, + "learning_rate": 2.2689979322488988e-05, + "loss": 0.1719, + "step": 13736, + "teacher_loss": 0.16286274790763855 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3036515414714813, + "learning_rate": 2.268802916530775e-05, + "loss": 0.2048, + "step": 13737, + "teacher_loss": 0.193766251206398 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.23262326419353485, + "learning_rate": 2.2686078831863044e-05, + "loss": 0.3249, + "step": 13738, + "teacher_loss": 0.335180401802063 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.48552048206329346, + "learning_rate": 2.26841283221996e-05, + "loss": 0.3244, + "step": 13739, + "teacher_loss": 0.30652815103530884 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.48697346448898315, + "learning_rate": 2.268217763636213e-05, + "loss": 0.2754, + "step": 13740, + "teacher_loss": 0.2519321143627167 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.30147188901901245, + "learning_rate": 2.2680226774395357e-05, + "loss": 0.1623, + "step": 13741, + "teacher_loss": 0.14687126874923706 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.20066815614700317, + "learning_rate": 2.2678275736344014e-05, + "loss": 0.1912, + "step": 13742, + "teacher_loss": 0.19010785222053528 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3933402895927429, + "learning_rate": 2.267632452225283e-05, + "loss": 0.2541, + "step": 13743, + "teacher_loss": 0.23868155479431152 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.21283546090126038, + "learning_rate": 2.267437313216654e-05, + "loss": 0.2109, + "step": 13744, + "teacher_loss": 0.21064089238643646 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.2045329213142395, + "learning_rate": 2.267242156612988e-05, + "loss": 0.2289, + "step": 13745, + "teacher_loss": 0.23158694803714752 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.30674394965171814, + "learning_rate": 2.2670469824187596e-05, + "loss": 0.2202, + "step": 13746, + "teacher_loss": 0.2105536162853241 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.6440914273262024, + "learning_rate": 2.266851790638444e-05, + "loss": 0.2709, + "step": 13747, + "teacher_loss": 0.22947090864181519 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.46461862325668335, + "learning_rate": 2.2666565812765157e-05, + "loss": 0.2194, + "step": 13748, + "teacher_loss": 0.19215229153633118 + }, + { + "compression_loss": 0.0, + "epoch": 2.48, + "label_loss": 0.3110160827636719, + "learning_rate": 2.2664613543374507e-05, + "loss": 0.2044, + "step": 13749, + "teacher_loss": 0.19259901344776154 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.18404620885849, + "learning_rate": 2.2662661098257247e-05, + "loss": 0.1634, + "step": 13750, + "teacher_loss": 0.1611512005329132 + }, + { + "epoch": 2.49, + "eval_exact_match": 79.85808893093662, + "eval_f1": 87.14025084262454, + "step": 13750 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.353465735912323, + "learning_rate": 2.2660708477458144e-05, + "loss": 0.1459, + "step": 13751, + "teacher_loss": 0.12278446555137634 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.37910670042037964, + "learning_rate": 2.265875568102196e-05, + "loss": 0.2143, + "step": 13752, + "teacher_loss": 0.19596660137176514 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.3110623359680176, + "learning_rate": 2.265680270899347e-05, + "loss": 0.2479, + "step": 13753, + "teacher_loss": 0.24086806178092957 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.2679804265499115, + "learning_rate": 2.2654849561417452e-05, + "loss": 0.1729, + "step": 13754, + "teacher_loss": 0.16235005855560303 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.370853066444397, + "learning_rate": 2.2652896238338687e-05, + "loss": 0.2217, + "step": 13755, + "teacher_loss": 0.2051302194595337 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5036153197288513, + "learning_rate": 2.2650942739801953e-05, + "loss": 0.2392, + "step": 13756, + "teacher_loss": 0.20976772904396057 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.13447241485118866, + "learning_rate": 2.264898906585204e-05, + "loss": 0.1902, + "step": 13757, + "teacher_loss": 0.1964164674282074 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.26214712858200073, + "learning_rate": 2.2647035216533742e-05, + "loss": 0.1683, + "step": 13758, + "teacher_loss": 0.15786007046699524 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.20998218655586243, + "learning_rate": 2.264508119189185e-05, + "loss": 0.2367, + "step": 13759, + "teacher_loss": 0.23972123861312866 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.9091475009918213, + "learning_rate": 2.2643126991971172e-05, + "loss": 0.3433, + "step": 13760, + "teacher_loss": 0.28047046065330505 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5114991664886475, + "learning_rate": 2.2641172616816507e-05, + "loss": 0.2387, + "step": 13761, + "teacher_loss": 0.2084435373544693 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.610339343547821, + "learning_rate": 2.263921806647266e-05, + "loss": 0.3082, + "step": 13762, + "teacher_loss": 0.2746211886405945 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.8649090528488159, + "learning_rate": 2.2637263340984446e-05, + "loss": 0.2771, + "step": 13763, + "teacher_loss": 0.21173523366451263 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5670951008796692, + "learning_rate": 2.2635308440396687e-05, + "loss": 0.3814, + "step": 13764, + "teacher_loss": 0.36081233620643616 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.4336152672767639, + "learning_rate": 2.2633353364754194e-05, + "loss": 0.2036, + "step": 13765, + "teacher_loss": 0.17801907658576965 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.34038621187210083, + "learning_rate": 2.2631398114101792e-05, + "loss": 0.2442, + "step": 13766, + "teacher_loss": 0.23353464901447296 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.462745726108551, + "learning_rate": 2.2629442688484316e-05, + "loss": 0.2407, + "step": 13767, + "teacher_loss": 0.21602681279182434 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5976167917251587, + "learning_rate": 2.262748708794659e-05, + "loss": 0.3463, + "step": 13768, + "teacher_loss": 0.31833136081695557 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.34929579496383667, + "learning_rate": 2.2625531312533457e-05, + "loss": 0.2755, + "step": 13769, + "teacher_loss": 0.2672916054725647 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.23493260145187378, + "learning_rate": 2.262357536228975e-05, + "loss": 0.2202, + "step": 13770, + "teacher_loss": 0.21853691339492798 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5107652544975281, + "learning_rate": 2.2621619237260318e-05, + "loss": 0.2521, + "step": 13771, + "teacher_loss": 0.22330498695373535 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 1.034281611442566, + "learning_rate": 2.2619662937490004e-05, + "loss": 0.3058, + "step": 13772, + "teacher_loss": 0.22491145133972168 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.6619790196418762, + "learning_rate": 2.2617706463023668e-05, + "loss": 0.3018, + "step": 13773, + "teacher_loss": 0.2617333233356476 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.33394694328308105, + "learning_rate": 2.2615749813906158e-05, + "loss": 0.2634, + "step": 13774, + "teacher_loss": 0.25554752349853516 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.42445796728134155, + "learning_rate": 2.261379299018234e-05, + "loss": 0.1992, + "step": 13775, + "teacher_loss": 0.17422622442245483 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.49240079522132874, + "learning_rate": 2.261183599189708e-05, + "loss": 0.4841, + "step": 13776, + "teacher_loss": 0.48315268754959106 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.8021292686462402, + "learning_rate": 2.2609878819095237e-05, + "loss": 0.3014, + "step": 13777, + "teacher_loss": 0.24581149220466614 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5289261341094971, + "learning_rate": 2.2607921471821697e-05, + "loss": 0.2824, + "step": 13778, + "teacher_loss": 0.254984587430954 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5143373012542725, + "learning_rate": 2.2605963950121315e-05, + "loss": 0.2222, + "step": 13779, + "teacher_loss": 0.18969523906707764 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.6002171635627747, + "learning_rate": 2.260400625403899e-05, + "loss": 0.3024, + "step": 13780, + "teacher_loss": 0.26932841539382935 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.9642322063446045, + "learning_rate": 2.26020483836196e-05, + "loss": 0.2991, + "step": 13781, + "teacher_loss": 0.22516149282455444 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.4277884364128113, + "learning_rate": 2.260009033890803e-05, + "loss": 0.2518, + "step": 13782, + "teacher_loss": 0.2322588562965393 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.3205012381076813, + "learning_rate": 2.2598132119949176e-05, + "loss": 0.2112, + "step": 13783, + "teacher_loss": 0.19909392297267914 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.34331250190734863, + "learning_rate": 2.259617372678793e-05, + "loss": 0.2927, + "step": 13784, + "teacher_loss": 0.2870316207408905 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.542759358882904, + "learning_rate": 2.2594215159469205e-05, + "loss": 0.2207, + "step": 13785, + "teacher_loss": 0.18489298224449158 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.9033209085464478, + "learning_rate": 2.259225641803788e-05, + "loss": 0.4198, + "step": 13786, + "teacher_loss": 0.36607107520103455 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.3481941223144531, + "learning_rate": 2.2590297502538896e-05, + "loss": 0.2074, + "step": 13787, + "teacher_loss": 0.19178421795368195 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5806370973587036, + "learning_rate": 2.2588338413017133e-05, + "loss": 0.2218, + "step": 13788, + "teacher_loss": 0.18187814950942993 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 1.3168507814407349, + "learning_rate": 2.2586379149517528e-05, + "loss": 0.5132, + "step": 13789, + "teacher_loss": 0.4238593876361847 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.18170928955078125, + "learning_rate": 2.2584419712084992e-05, + "loss": 0.2235, + "step": 13790, + "teacher_loss": 0.2281537652015686 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.3284096419811249, + "learning_rate": 2.2582460100764445e-05, + "loss": 0.2623, + "step": 13791, + "teacher_loss": 0.2549816966056824 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5958716869354248, + "learning_rate": 2.2580500315600832e-05, + "loss": 0.3446, + "step": 13792, + "teacher_loss": 0.3167067766189575 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.34571486711502075, + "learning_rate": 2.257854035663907e-05, + "loss": 0.2342, + "step": 13793, + "teacher_loss": 0.2218383550643921 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.7008187174797058, + "learning_rate": 2.25765802239241e-05, + "loss": 0.36, + "step": 13794, + "teacher_loss": 0.3221738934516907 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.32773566246032715, + "learning_rate": 2.2574619917500858e-05, + "loss": 0.2589, + "step": 13795, + "teacher_loss": 0.25120246410369873 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5814257264137268, + "learning_rate": 2.2572659437414295e-05, + "loss": 0.2812, + "step": 13796, + "teacher_loss": 0.24784106016159058 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.44662514328956604, + "learning_rate": 2.2570698783709355e-05, + "loss": 0.2275, + "step": 13797, + "teacher_loss": 0.20317766070365906 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.7091720700263977, + "learning_rate": 2.2568737956430987e-05, + "loss": 0.278, + "step": 13798, + "teacher_loss": 0.23007085919380188 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.5034940242767334, + "learning_rate": 2.256677695562415e-05, + "loss": 0.3261, + "step": 13799, + "teacher_loss": 0.30634692311286926 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.14518913626670837, + "learning_rate": 2.2564815781333805e-05, + "loss": 0.155, + "step": 13800, + "teacher_loss": 0.15605610609054565 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.302400678396225, + "learning_rate": 2.2562854433604915e-05, + "loss": 0.2485, + "step": 13801, + "teacher_loss": 0.2425551414489746 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.26147955656051636, + "learning_rate": 2.2560892912482445e-05, + "loss": 0.2623, + "step": 13802, + "teacher_loss": 0.26239651441574097 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.3270567059516907, + "learning_rate": 2.2558931218011376e-05, + "loss": 0.2503, + "step": 13803, + "teacher_loss": 0.2417515218257904 + }, + { + "compression_loss": 0.0, + "epoch": 2.49, + "label_loss": 0.31223657727241516, + "learning_rate": 2.2556969350236668e-05, + "loss": 0.2259, + "step": 13804, + "teacher_loss": 0.21632379293441772 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.16385126113891602, + "learning_rate": 2.255500730920332e-05, + "loss": 0.1852, + "step": 13805, + "teacher_loss": 0.18751777708530426 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.9464585781097412, + "learning_rate": 2.25530450949563e-05, + "loss": 0.3604, + "step": 13806, + "teacher_loss": 0.29525020718574524 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5212752819061279, + "learning_rate": 2.25510827075406e-05, + "loss": 0.2523, + "step": 13807, + "teacher_loss": 0.22245824337005615 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.36930155754089355, + "learning_rate": 2.254912014700121e-05, + "loss": 0.2137, + "step": 13808, + "teacher_loss": 0.19640091061592102 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.7803667783737183, + "learning_rate": 2.254715741338313e-05, + "loss": 0.421, + "step": 13809, + "teacher_loss": 0.38107338547706604 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5447787642478943, + "learning_rate": 2.2545194506731365e-05, + "loss": 0.3626, + "step": 13810, + "teacher_loss": 0.3423329293727875 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.17876482009887695, + "learning_rate": 2.2543231427090908e-05, + "loss": 0.2377, + "step": 13811, + "teacher_loss": 0.2442304939031601 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.32919400930404663, + "learning_rate": 2.2541268174506768e-05, + "loss": 0.2341, + "step": 13812, + "teacher_loss": 0.22353880107402802 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5688897967338562, + "learning_rate": 2.2539304749023958e-05, + "loss": 0.4087, + "step": 13813, + "teacher_loss": 0.39087629318237305 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.6939910650253296, + "learning_rate": 2.2537341150687497e-05, + "loss": 0.2913, + "step": 13814, + "teacher_loss": 0.2465360164642334 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.44366300106048584, + "learning_rate": 2.25353773795424e-05, + "loss": 0.3027, + "step": 13815, + "teacher_loss": 0.2869938015937805 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.9140196442604065, + "learning_rate": 2.2533413435633692e-05, + "loss": 0.3869, + "step": 13816, + "teacher_loss": 0.3283763527870178 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.40716859698295593, + "learning_rate": 2.25314493190064e-05, + "loss": 0.2227, + "step": 13817, + "teacher_loss": 0.20223474502563477 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.27802783250808716, + "learning_rate": 2.2529485029705558e-05, + "loss": 0.2661, + "step": 13818, + "teacher_loss": 0.26473915576934814 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5900223255157471, + "learning_rate": 2.2527520567776195e-05, + "loss": 0.2364, + "step": 13819, + "teacher_loss": 0.19709137082099915 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.40474605560302734, + "learning_rate": 2.2525555933263354e-05, + "loss": 0.2599, + "step": 13820, + "teacher_loss": 0.24381205439567566 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.433512806892395, + "learning_rate": 2.2523591126212084e-05, + "loss": 0.2123, + "step": 13821, + "teacher_loss": 0.1876736283302307 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.40282338857650757, + "learning_rate": 2.2521626146667423e-05, + "loss": 0.285, + "step": 13822, + "teacher_loss": 0.2719587981700897 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3457735478878021, + "learning_rate": 2.2519660994674423e-05, + "loss": 0.297, + "step": 13823, + "teacher_loss": 0.2915331721305847 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 1.319657802581787, + "learning_rate": 2.2517695670278143e-05, + "loss": 0.3618, + "step": 13824, + "teacher_loss": 0.2554050385951996 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.857620120048523, + "learning_rate": 2.2515730173523645e-05, + "loss": 0.5446, + "step": 13825, + "teacher_loss": 0.5097886919975281 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3763587474822998, + "learning_rate": 2.251376450445598e-05, + "loss": 0.2912, + "step": 13826, + "teacher_loss": 0.28168806433677673 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.39884382486343384, + "learning_rate": 2.251179866312023e-05, + "loss": 0.2826, + "step": 13827, + "teacher_loss": 0.2696320712566376 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3768623173236847, + "learning_rate": 2.2509832649561453e-05, + "loss": 0.2883, + "step": 13828, + "teacher_loss": 0.2784094512462616 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.22470203042030334, + "learning_rate": 2.2507866463824727e-05, + "loss": 0.2636, + "step": 13829, + "teacher_loss": 0.26796603202819824 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.21355083584785461, + "learning_rate": 2.250590010595514e-05, + "loss": 0.2247, + "step": 13830, + "teacher_loss": 0.22596681118011475 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.7543628215789795, + "learning_rate": 2.2503933575997763e-05, + "loss": 0.4384, + "step": 13831, + "teacher_loss": 0.403306245803833 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3063744306564331, + "learning_rate": 2.2501966873997685e-05, + "loss": 0.2187, + "step": 13832, + "teacher_loss": 0.20897531509399414 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.7782771587371826, + "learning_rate": 2.25e-05, + "loss": 0.4179, + "step": 13833, + "teacher_loss": 0.37787577509880066 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5411162972450256, + "learning_rate": 2.24980329540498e-05, + "loss": 0.5695, + "step": 13834, + "teacher_loss": 0.5727081298828125 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.4747687578201294, + "learning_rate": 2.249606573619219e-05, + "loss": 0.2324, + "step": 13835, + "teacher_loss": 0.2054956555366516 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.33572036027908325, + "learning_rate": 2.2494098346472264e-05, + "loss": 0.3275, + "step": 13836, + "teacher_loss": 0.32656246423721313 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.2985009253025055, + "learning_rate": 2.249213078493513e-05, + "loss": 0.161, + "step": 13837, + "teacher_loss": 0.14569967985153198 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.7003904581069946, + "learning_rate": 2.2490163051625898e-05, + "loss": 0.2573, + "step": 13838, + "teacher_loss": 0.20812034606933594 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.27782052755355835, + "learning_rate": 2.248819514658969e-05, + "loss": 0.1816, + "step": 13839, + "teacher_loss": 0.1709585040807724 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3467024564743042, + "learning_rate": 2.2486227069871614e-05, + "loss": 0.3182, + "step": 13840, + "teacher_loss": 0.3149855434894562 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.4810560345649719, + "learning_rate": 2.24842588215168e-05, + "loss": 0.2754, + "step": 13841, + "teacher_loss": 0.25256556272506714 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.627578854560852, + "learning_rate": 2.2482290401570368e-05, + "loss": 0.2671, + "step": 13842, + "teacher_loss": 0.22709615528583527 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.2952554225921631, + "learning_rate": 2.2480321810077445e-05, + "loss": 0.2107, + "step": 13843, + "teacher_loss": 0.20135092735290527 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.6124990582466125, + "learning_rate": 2.2478353047083176e-05, + "loss": 0.4082, + "step": 13844, + "teacher_loss": 0.3854805529117584 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5393595695495605, + "learning_rate": 2.2476384112632692e-05, + "loss": 0.3305, + "step": 13845, + "teacher_loss": 0.3072936534881592 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3037719130516052, + "learning_rate": 2.247441500677113e-05, + "loss": 0.1813, + "step": 13846, + "teacher_loss": 0.16771341860294342 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.5305928587913513, + "learning_rate": 2.247244572954365e-05, + "loss": 0.364, + "step": 13847, + "teacher_loss": 0.34546053409576416 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.667974591255188, + "learning_rate": 2.247047628099539e-05, + "loss": 0.3568, + "step": 13848, + "teacher_loss": 0.3222300410270691 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.34892773628234863, + "learning_rate": 2.246850666117151e-05, + "loss": 0.2988, + "step": 13849, + "teacher_loss": 0.29325512051582336 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.1705968976020813, + "learning_rate": 2.2466536870117157e-05, + "loss": 0.2039, + "step": 13850, + "teacher_loss": 0.20757699012756348 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.506560742855072, + "learning_rate": 2.24645669078775e-05, + "loss": 0.5226, + "step": 13851, + "teacher_loss": 0.5243268013000488 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.969771146774292, + "learning_rate": 2.2462596774497707e-05, + "loss": 0.5286, + "step": 13852, + "teacher_loss": 0.4795331358909607 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.2480800300836563, + "learning_rate": 2.2460626470022944e-05, + "loss": 0.2472, + "step": 13853, + "teacher_loss": 0.24715490639209747 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.615435004234314, + "learning_rate": 2.245865599449838e-05, + "loss": 0.3108, + "step": 13854, + "teacher_loss": 0.27692246437072754 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.3770451247692108, + "learning_rate": 2.2456685347969206e-05, + "loss": 0.2258, + "step": 13855, + "teacher_loss": 0.20894969999790192 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.4612610936164856, + "learning_rate": 2.2454714530480582e-05, + "loss": 0.3029, + "step": 13856, + "teacher_loss": 0.28530219197273254 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.4208654761314392, + "learning_rate": 2.2452743542077712e-05, + "loss": 0.283, + "step": 13857, + "teacher_loss": 0.26767972111701965 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.23390749096870422, + "learning_rate": 2.245077238280577e-05, + "loss": 0.1964, + "step": 13858, + "teacher_loss": 0.19224053621292114 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.11860658973455429, + "learning_rate": 2.244880105270996e-05, + "loss": 0.157, + "step": 13859, + "teacher_loss": 0.16124776005744934 + }, + { + "compression_loss": 0.0, + "epoch": 2.5, + "label_loss": 0.44247114658355713, + "learning_rate": 2.2446829551835476e-05, + "loss": 0.2532, + "step": 13860, + "teacher_loss": 0.232208251953125 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3132500648498535, + "learning_rate": 2.2444857880227516e-05, + "loss": 0.2303, + "step": 13861, + "teacher_loss": 0.2211143523454666 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.7542303800582886, + "learning_rate": 2.2442886037931284e-05, + "loss": 0.2713, + "step": 13862, + "teacher_loss": 0.2176954448223114 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.2016574889421463, + "learning_rate": 2.2440914024991994e-05, + "loss": 0.2009, + "step": 13863, + "teacher_loss": 0.20086276531219482 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5363192558288574, + "learning_rate": 2.2438941841454847e-05, + "loss": 0.3049, + "step": 13864, + "teacher_loss": 0.27919143438339233 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5406144261360168, + "learning_rate": 2.2436969487365073e-05, + "loss": 0.313, + "step": 13865, + "teacher_loss": 0.2877134084701538 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.419107586145401, + "learning_rate": 2.2434996962767884e-05, + "loss": 0.2735, + "step": 13866, + "teacher_loss": 0.25729072093963623 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3836519122123718, + "learning_rate": 2.2433024267708506e-05, + "loss": 0.2202, + "step": 13867, + "teacher_loss": 0.2020922601222992 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.38623061776161194, + "learning_rate": 2.2431051402232164e-05, + "loss": 0.2425, + "step": 13868, + "teacher_loss": 0.22649943828582764 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6129484176635742, + "learning_rate": 2.2429078366384096e-05, + "loss": 0.2391, + "step": 13869, + "teacher_loss": 0.19754809141159058 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6627760529518127, + "learning_rate": 2.2427105160209534e-05, + "loss": 0.4407, + "step": 13870, + "teacher_loss": 0.41598284244537354 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.4372401535511017, + "learning_rate": 2.2425131783753723e-05, + "loss": 0.2835, + "step": 13871, + "teacher_loss": 0.26644349098205566 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.2921396493911743, + "learning_rate": 2.24231582370619e-05, + "loss": 0.22, + "step": 13872, + "teacher_loss": 0.21194377541542053 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.4336320757865906, + "learning_rate": 2.242118452017931e-05, + "loss": 0.278, + "step": 13873, + "teacher_loss": 0.2607576251029968 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.339330792427063, + "learning_rate": 2.241921063315121e-05, + "loss": 0.1971, + "step": 13874, + "teacher_loss": 0.18126636743545532 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.24718916416168213, + "learning_rate": 2.2417236576022856e-05, + "loss": 0.4253, + "step": 13875, + "teacher_loss": 0.44512125849723816 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3884221315383911, + "learning_rate": 2.2415262348839503e-05, + "loss": 0.3467, + "step": 13876, + "teacher_loss": 0.3421027958393097 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.813828706741333, + "learning_rate": 2.2413287951646418e-05, + "loss": 0.3237, + "step": 13877, + "teacher_loss": 0.26923543214797974 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.8708653450012207, + "learning_rate": 2.2411313384488864e-05, + "loss": 0.3716, + "step": 13878, + "teacher_loss": 0.3160707354545593 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.4535222351551056, + "learning_rate": 2.2409338647412116e-05, + "loss": 0.2606, + "step": 13879, + "teacher_loss": 0.23920580744743347 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.4159276485443115, + "learning_rate": 2.2407363740461448e-05, + "loss": 0.1893, + "step": 13880, + "teacher_loss": 0.16413141787052155 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.26848524808883667, + "learning_rate": 2.2405388663682137e-05, + "loss": 0.1855, + "step": 13881, + "teacher_loss": 0.17623446881771088 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.702314555644989, + "learning_rate": 2.240341341711947e-05, + "loss": 0.2934, + "step": 13882, + "teacher_loss": 0.2480011284351349 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6371479034423828, + "learning_rate": 2.2401438000818724e-05, + "loss": 0.3352, + "step": 13883, + "teacher_loss": 0.3015964925289154 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5277643203735352, + "learning_rate": 2.2399462414825195e-05, + "loss": 0.3026, + "step": 13884, + "teacher_loss": 0.2776230573654175 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3296002447605133, + "learning_rate": 2.2397486659184186e-05, + "loss": 0.2327, + "step": 13885, + "teacher_loss": 0.2219845950603485 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.238526850938797, + "learning_rate": 2.239551073394098e-05, + "loss": 0.2698, + "step": 13886, + "teacher_loss": 0.2733253240585327 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6469662189483643, + "learning_rate": 2.239353463914089e-05, + "loss": 0.3707, + "step": 13887, + "teacher_loss": 0.3399810194969177 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5941764116287231, + "learning_rate": 2.2391558374829212e-05, + "loss": 0.3402, + "step": 13888, + "teacher_loss": 0.31203338503837585 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.37006253004074097, + "learning_rate": 2.2389581941051264e-05, + "loss": 0.2107, + "step": 13889, + "teacher_loss": 0.19299374520778656 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.4980618357658386, + "learning_rate": 2.2387605337852358e-05, + "loss": 0.3494, + "step": 13890, + "teacher_loss": 0.3328579068183899 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.28581681847572327, + "learning_rate": 2.2385628565277813e-05, + "loss": 0.2599, + "step": 13891, + "teacher_loss": 0.2570514678955078 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.9599497318267822, + "learning_rate": 2.2383651623372945e-05, + "loss": 0.2801, + "step": 13892, + "teacher_loss": 0.20454248785972595 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.2410169094800949, + "learning_rate": 2.2381674512183085e-05, + "loss": 0.2174, + "step": 13893, + "teacher_loss": 0.21477477252483368 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5100368857383728, + "learning_rate": 2.2379697231753554e-05, + "loss": 0.3211, + "step": 13894, + "teacher_loss": 0.30011799931526184 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3362150192260742, + "learning_rate": 2.2377719782129698e-05, + "loss": 0.2298, + "step": 13895, + "teacher_loss": 0.21792559325695038 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.46865421533584595, + "learning_rate": 2.2375742163356844e-05, + "loss": 0.2404, + "step": 13896, + "teacher_loss": 0.21506252884864807 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6836999654769897, + "learning_rate": 2.2373764375480335e-05, + "loss": 0.261, + "step": 13897, + "teacher_loss": 0.21405695378780365 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.29324865341186523, + "learning_rate": 2.2371786418545515e-05, + "loss": 0.268, + "step": 13898, + "teacher_loss": 0.26519161462783813 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.2970011830329895, + "learning_rate": 2.236980829259774e-05, + "loss": 0.297, + "step": 13899, + "teacher_loss": 0.29696834087371826 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.2556993365287781, + "learning_rate": 2.2367829997682353e-05, + "loss": 0.2167, + "step": 13900, + "teacher_loss": 0.21231606602668762 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3662340044975281, + "learning_rate": 2.2365851533844715e-05, + "loss": 0.2837, + "step": 13901, + "teacher_loss": 0.27453556656837463 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.22476446628570557, + "learning_rate": 2.236387290113018e-05, + "loss": 0.2005, + "step": 13902, + "teacher_loss": 0.197752445936203 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5312143564224243, + "learning_rate": 2.2361894099584126e-05, + "loss": 0.297, + "step": 13903, + "teacher_loss": 0.27095258235931396 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.48935821652412415, + "learning_rate": 2.2359915129251905e-05, + "loss": 0.3174, + "step": 13904, + "teacher_loss": 0.29834309220314026 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.5989590287208557, + "learning_rate": 2.2357935990178904e-05, + "loss": 0.2785, + "step": 13905, + "teacher_loss": 0.2429085671901703 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3596482276916504, + "learning_rate": 2.2355956682410485e-05, + "loss": 0.2428, + "step": 13906, + "teacher_loss": 0.2297956645488739 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3609699308872223, + "learning_rate": 2.2353977205992036e-05, + "loss": 0.232, + "step": 13907, + "teacher_loss": 0.21771734952926636 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6307642459869385, + "learning_rate": 2.2351997560968935e-05, + "loss": 0.4019, + "step": 13908, + "teacher_loss": 0.376476526260376 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.689670979976654, + "learning_rate": 2.2350017747386573e-05, + "loss": 0.2795, + "step": 13909, + "teacher_loss": 0.23390254378318787 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6576633453369141, + "learning_rate": 2.2348037765290346e-05, + "loss": 0.3032, + "step": 13910, + "teacher_loss": 0.26383835077285767 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.6056357622146606, + "learning_rate": 2.234605761472564e-05, + "loss": 0.2608, + "step": 13911, + "teacher_loss": 0.2225216180086136 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.7158815264701843, + "learning_rate": 2.2344077295737856e-05, + "loss": 0.383, + "step": 13912, + "teacher_loss": 0.345986008644104 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.18104025721549988, + "learning_rate": 2.2342096808372398e-05, + "loss": 0.2508, + "step": 13913, + "teacher_loss": 0.25856393575668335 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.3073023855686188, + "learning_rate": 2.2340116152674677e-05, + "loss": 0.2601, + "step": 13914, + "teacher_loss": 0.25490254163742065 + }, + { + "compression_loss": 0.0, + "epoch": 2.51, + "label_loss": 0.17231950163841248, + "learning_rate": 2.23381353286901e-05, + "loss": 0.1967, + "step": 13915, + "teacher_loss": 0.19945842027664185 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5917526483535767, + "learning_rate": 2.2336154336464074e-05, + "loss": 0.2995, + "step": 13916, + "teacher_loss": 0.26708242297172546 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.6367902755737305, + "learning_rate": 2.233417317604203e-05, + "loss": 0.3458, + "step": 13917, + "teacher_loss": 0.3134750425815582 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.48613840341567993, + "learning_rate": 2.2332191847469384e-05, + "loss": 0.2938, + "step": 13918, + "teacher_loss": 0.2724721133708954 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.7414456605911255, + "learning_rate": 2.2330210350791555e-05, + "loss": 0.3253, + "step": 13919, + "teacher_loss": 0.2790381908416748 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5379503965377808, + "learning_rate": 2.2328228686053987e-05, + "loss": 0.2255, + "step": 13920, + "teacher_loss": 0.19075697660446167 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.44399380683898926, + "learning_rate": 2.23262468533021e-05, + "loss": 0.2911, + "step": 13921, + "teacher_loss": 0.2741544246673584 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.540751576423645, + "learning_rate": 2.2324264852581346e-05, + "loss": 0.266, + "step": 13922, + "teacher_loss": 0.2355162352323532 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2988436222076416, + "learning_rate": 2.2322282683937155e-05, + "loss": 0.2501, + "step": 13923, + "teacher_loss": 0.2446373552083969 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4422075152397156, + "learning_rate": 2.232030034741497e-05, + "loss": 0.6024, + "step": 13924, + "teacher_loss": 0.6202346086502075 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.38594767451286316, + "learning_rate": 2.2318317843060254e-05, + "loss": 0.1837, + "step": 13925, + "teacher_loss": 0.1612045168876648 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.47010648250579834, + "learning_rate": 2.2316335170918446e-05, + "loss": 0.3128, + "step": 13926, + "teacher_loss": 0.29532408714294434 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4728595018386841, + "learning_rate": 2.2314352331035007e-05, + "loss": 0.2786, + "step": 13927, + "teacher_loss": 0.257002592086792 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4968397617340088, + "learning_rate": 2.23123693234554e-05, + "loss": 0.3283, + "step": 13928, + "teacher_loss": 0.3095853328704834 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4015643298625946, + "learning_rate": 2.231038614822509e-05, + "loss": 0.2228, + "step": 13929, + "teacher_loss": 0.20291663706302643 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4670286774635315, + "learning_rate": 2.2308402805389545e-05, + "loss": 0.2843, + "step": 13930, + "teacher_loss": 0.2640414237976074 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.40011703968048096, + "learning_rate": 2.230641929499423e-05, + "loss": 0.2087, + "step": 13931, + "teacher_loss": 0.18744783103466034 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5586017966270447, + "learning_rate": 2.230443561708463e-05, + "loss": 0.3682, + "step": 13932, + "teacher_loss": 0.34705692529678345 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5576722621917725, + "learning_rate": 2.230245177170622e-05, + "loss": 0.2571, + "step": 13933, + "teacher_loss": 0.2236793488264084 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4924560487270355, + "learning_rate": 2.2300467758904486e-05, + "loss": 0.3365, + "step": 13934, + "teacher_loss": 0.3192119002342224 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.47274962067604065, + "learning_rate": 2.2298483578724914e-05, + "loss": 0.2099, + "step": 13935, + "teacher_loss": 0.18071559071540833 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4030134081840515, + "learning_rate": 2.229649923121299e-05, + "loss": 0.3484, + "step": 13936, + "teacher_loss": 0.3423698842525482 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.283250629901886, + "learning_rate": 2.229451471641422e-05, + "loss": 0.2776, + "step": 13937, + "teacher_loss": 0.27701130509376526 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5338020324707031, + "learning_rate": 2.2292530034374097e-05, + "loss": 0.2752, + "step": 13938, + "teacher_loss": 0.24645012617111206 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.8010367155075073, + "learning_rate": 2.2290545185138125e-05, + "loss": 0.2999, + "step": 13939, + "teacher_loss": 0.24420180916786194 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.6379849910736084, + "learning_rate": 2.2288560168751812e-05, + "loss": 0.2999, + "step": 13940, + "teacher_loss": 0.26234519481658936 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.6040122509002686, + "learning_rate": 2.228657498526066e-05, + "loss": 0.2929, + "step": 13941, + "teacher_loss": 0.2582801878452301 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2518012523651123, + "learning_rate": 2.2284589634710197e-05, + "loss": 0.3108, + "step": 13942, + "teacher_loss": 0.31741055846214294 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4290907680988312, + "learning_rate": 2.2282604117145934e-05, + "loss": 0.2254, + "step": 13943, + "teacher_loss": 0.2027406394481659 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2751498818397522, + "learning_rate": 2.2280618432613385e-05, + "loss": 0.2062, + "step": 13944, + "teacher_loss": 0.19855996966362 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.6176972985267639, + "learning_rate": 2.2278632581158095e-05, + "loss": 0.2512, + "step": 13945, + "teacher_loss": 0.21047306060791016 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4580097794532776, + "learning_rate": 2.2276646562825572e-05, + "loss": 0.3946, + "step": 13946, + "teacher_loss": 0.387581467628479 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2912094295024872, + "learning_rate": 2.227466037766136e-05, + "loss": 0.2476, + "step": 13947, + "teacher_loss": 0.24274703860282898 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.6111304759979248, + "learning_rate": 2.2272674025711004e-05, + "loss": 0.326, + "step": 13948, + "teacher_loss": 0.2942809760570526 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.19553899765014648, + "learning_rate": 2.227068750702003e-05, + "loss": 0.2301, + "step": 13949, + "teacher_loss": 0.23393535614013672 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5711072683334351, + "learning_rate": 2.2268700821633996e-05, + "loss": 0.2438, + "step": 13950, + "teacher_loss": 0.207398921251297 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.26664450764656067, + "learning_rate": 2.2266713969598442e-05, + "loss": 0.1735, + "step": 13951, + "teacher_loss": 0.16310890018939972 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5667017102241516, + "learning_rate": 2.226472695095892e-05, + "loss": 0.3738, + "step": 13952, + "teacher_loss": 0.3523963391780853 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.601837158203125, + "learning_rate": 2.2262739765761e-05, + "loss": 0.4228, + "step": 13953, + "teacher_loss": 0.40289610624313354 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 1.0174680948257446, + "learning_rate": 2.226075241405022e-05, + "loss": 0.3578, + "step": 13954, + "teacher_loss": 0.28445762395858765 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.324079304933548, + "learning_rate": 2.225876489587216e-05, + "loss": 0.1981, + "step": 13955, + "teacher_loss": 0.18404968082904816 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.38662248849868774, + "learning_rate": 2.225677721127239e-05, + "loss": 0.3806, + "step": 13956, + "teacher_loss": 0.37989723682403564 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5130556225776672, + "learning_rate": 2.2254789360296468e-05, + "loss": 0.2628, + "step": 13957, + "teacher_loss": 0.2349393665790558 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2537984549999237, + "learning_rate": 2.225280134298998e-05, + "loss": 0.1561, + "step": 13958, + "teacher_loss": 0.1452627182006836 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2435506284236908, + "learning_rate": 2.2250813159398505e-05, + "loss": 0.1902, + "step": 13959, + "teacher_loss": 0.18429747223854065 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5710318088531494, + "learning_rate": 2.2248824809567618e-05, + "loss": 0.27, + "step": 13960, + "teacher_loss": 0.23659634590148926 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.48236602544784546, + "learning_rate": 2.224683629354291e-05, + "loss": 0.2298, + "step": 13961, + "teacher_loss": 0.20179128646850586 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.5113157629966736, + "learning_rate": 2.224484761136998e-05, + "loss": 0.2497, + "step": 13962, + "teacher_loss": 0.22058531641960144 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.11343131959438324, + "learning_rate": 2.224285876309441e-05, + "loss": 0.254, + "step": 13963, + "teacher_loss": 0.2695688307285309 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.33425742387771606, + "learning_rate": 2.224086974876181e-05, + "loss": 0.1897, + "step": 13964, + "teacher_loss": 0.17364037036895752 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4697314500808716, + "learning_rate": 2.223888056841777e-05, + "loss": 0.3836, + "step": 13965, + "teacher_loss": 0.37405329942703247 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.1945856213569641, + "learning_rate": 2.22368912221079e-05, + "loss": 0.184, + "step": 13966, + "teacher_loss": 0.18284344673156738 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.21014806628227234, + "learning_rate": 2.2234901709877814e-05, + "loss": 0.2664, + "step": 13967, + "teacher_loss": 0.2726455628871918 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 1.305837631225586, + "learning_rate": 2.223291203177313e-05, + "loss": 0.385, + "step": 13968, + "teacher_loss": 0.2826395034790039 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.2691567540168762, + "learning_rate": 2.2230922187839447e-05, + "loss": 0.1736, + "step": 13969, + "teacher_loss": 0.16298556327819824 + }, + { + "compression_loss": 0.0, + "epoch": 2.52, + "label_loss": 0.4690394997596741, + "learning_rate": 2.2228932178122407e-05, + "loss": 0.1872, + "step": 13970, + "teacher_loss": 0.1558523178100586 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.2960183024406433, + "learning_rate": 2.2226942002667622e-05, + "loss": 0.375, + "step": 13971, + "teacher_loss": 0.38380488753318787 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.4008580446243286, + "learning_rate": 2.2224951661520718e-05, + "loss": 0.3499, + "step": 13972, + "teacher_loss": 0.3442028760910034 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.6065565347671509, + "learning_rate": 2.2222961154727346e-05, + "loss": 0.2717, + "step": 13973, + "teacher_loss": 0.23452654480934143 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.4173336327075958, + "learning_rate": 2.222097048233312e-05, + "loss": 0.2276, + "step": 13974, + "teacher_loss": 0.2064652442932129 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.6351914405822754, + "learning_rate": 2.22189796443837e-05, + "loss": 0.4771, + "step": 13975, + "teacher_loss": 0.4594815969467163 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.8871963620185852, + "learning_rate": 2.2216988640924716e-05, + "loss": 0.3788, + "step": 13976, + "teacher_loss": 0.3223353624343872 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 1.298387050628662, + "learning_rate": 2.2214997472001814e-05, + "loss": 0.8011, + "step": 13977, + "teacher_loss": 0.7458642721176147 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.23911911249160767, + "learning_rate": 2.221300613766066e-05, + "loss": 0.1841, + "step": 13978, + "teacher_loss": 0.1779319941997528 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.8469879627227783, + "learning_rate": 2.2211014637946896e-05, + "loss": 0.5976, + "step": 13979, + "teacher_loss": 0.5698357820510864 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.7309197187423706, + "learning_rate": 2.220902297290619e-05, + "loss": 0.2763, + "step": 13980, + "teacher_loss": 0.22574323415756226 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.4994797110557556, + "learning_rate": 2.22070311425842e-05, + "loss": 0.2166, + "step": 13981, + "teacher_loss": 0.18515023589134216 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.7803881764411926, + "learning_rate": 2.220503914702659e-05, + "loss": 0.4206, + "step": 13982, + "teacher_loss": 0.3806071877479553 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.8564822673797607, + "learning_rate": 2.2203046986279038e-05, + "loss": 0.3427, + "step": 13983, + "teacher_loss": 0.28565141558647156 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.37768858671188354, + "learning_rate": 2.2201054660387212e-05, + "loss": 0.2751, + "step": 13984, + "teacher_loss": 0.2637355327606201 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5590472221374512, + "learning_rate": 2.2199062169396796e-05, + "loss": 0.2392, + "step": 13985, + "teacher_loss": 0.20362427830696106 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.19404137134552002, + "learning_rate": 2.219706951335347e-05, + "loss": 0.1982, + "step": 13986, + "teacher_loss": 0.19869671761989594 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.2864931523799896, + "learning_rate": 2.219507669230291e-05, + "loss": 0.2871, + "step": 13987, + "teacher_loss": 0.287209689617157 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.3811852037906647, + "learning_rate": 2.219308370629082e-05, + "loss": 0.3106, + "step": 13988, + "teacher_loss": 0.30280041694641113 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.400515615940094, + "learning_rate": 2.2191090555362882e-05, + "loss": 0.2075, + "step": 13989, + "teacher_loss": 0.18609312176704407 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.2266104519367218, + "learning_rate": 2.2189097239564803e-05, + "loss": 0.2045, + "step": 13990, + "teacher_loss": 0.20202240347862244 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.24703636765480042, + "learning_rate": 2.2187103758942274e-05, + "loss": 0.2319, + "step": 13991, + "teacher_loss": 0.23016972839832306 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5101150274276733, + "learning_rate": 2.2185110113541005e-05, + "loss": 0.2699, + "step": 13992, + "teacher_loss": 0.24318143725395203 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.3280387818813324, + "learning_rate": 2.2183116303406705e-05, + "loss": 0.28, + "step": 13993, + "teacher_loss": 0.2746083438396454 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5447311401367188, + "learning_rate": 2.2181122328585077e-05, + "loss": 0.3099, + "step": 13994, + "teacher_loss": 0.2838304042816162 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.18166238069534302, + "learning_rate": 2.2179128189121853e-05, + "loss": 0.2451, + "step": 13995, + "teacher_loss": 0.2521204650402069 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.6368261575698853, + "learning_rate": 2.217713388506274e-05, + "loss": 0.2513, + "step": 13996, + "teacher_loss": 0.20843097567558289 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5650144219398499, + "learning_rate": 2.2175139416453466e-05, + "loss": 0.2872, + "step": 13997, + "teacher_loss": 0.25634029507637024 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.4835836589336395, + "learning_rate": 2.2173144783339756e-05, + "loss": 0.2544, + "step": 13998, + "teacher_loss": 0.228973388671875 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5226331353187561, + "learning_rate": 2.217114998576734e-05, + "loss": 0.2471, + "step": 13999, + "teacher_loss": 0.21647889912128448 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.43752187490463257, + "learning_rate": 2.2169155023781962e-05, + "loss": 0.2285, + "step": 14000, + "teacher_loss": 0.2052355259656906 + }, + { + "epoch": 2.53, + "eval_exact_match": 79.5837275307474, + "eval_f1": 87.03383772269521, + "step": 14000 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5957362055778503, + "learning_rate": 2.2167159897429352e-05, + "loss": 0.3026, + "step": 14001, + "teacher_loss": 0.27007582783699036 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.8391679525375366, + "learning_rate": 2.2165164606755247e-05, + "loss": 0.5381, + "step": 14002, + "teacher_loss": 0.5046356916427612 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.33081912994384766, + "learning_rate": 2.216316915180541e-05, + "loss": 0.346, + "step": 14003, + "teacher_loss": 0.3477044105529785 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.3575651943683624, + "learning_rate": 2.2161173532625573e-05, + "loss": 0.1968, + "step": 14004, + "teacher_loss": 0.1789817214012146 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.910348117351532, + "learning_rate": 2.21591777492615e-05, + "loss": 0.5704, + "step": 14005, + "teacher_loss": 0.5325928926467896 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.48551708459854126, + "learning_rate": 2.2157181801758945e-05, + "loss": 0.3222, + "step": 14006, + "teacher_loss": 0.3040800094604492 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5723710656166077, + "learning_rate": 2.215518569016367e-05, + "loss": 0.3423, + "step": 14007, + "teacher_loss": 0.31678885221481323 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.43259888887405396, + "learning_rate": 2.215318941452144e-05, + "loss": 0.2864, + "step": 14008, + "teacher_loss": 0.27018433809280396 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.28980234265327454, + "learning_rate": 2.215119297487802e-05, + "loss": 0.2184, + "step": 14009, + "teacher_loss": 0.21044719219207764 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.6581799983978271, + "learning_rate": 2.214919637127919e-05, + "loss": 0.3287, + "step": 14010, + "teacher_loss": 0.29211726784706116 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.35981911420822144, + "learning_rate": 2.2147199603770717e-05, + "loss": 0.3267, + "step": 14011, + "teacher_loss": 0.3230147659778595 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5385850667953491, + "learning_rate": 2.2145202672398387e-05, + "loss": 0.3265, + "step": 14012, + "teacher_loss": 0.3028862476348877 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.29459571838378906, + "learning_rate": 2.2143205577207985e-05, + "loss": 0.1835, + "step": 14013, + "teacher_loss": 0.17117883265018463 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.34881746768951416, + "learning_rate": 2.2141208318245294e-05, + "loss": 0.2401, + "step": 14014, + "teacher_loss": 0.22796830534934998 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5071390867233276, + "learning_rate": 2.2139210895556104e-05, + "loss": 0.3275, + "step": 14015, + "teacher_loss": 0.30757445096969604 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.6363747715950012, + "learning_rate": 2.2137213309186214e-05, + "loss": 0.3166, + "step": 14016, + "teacher_loss": 0.281048983335495 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.17972978949546814, + "learning_rate": 2.2135215559181423e-05, + "loss": 0.2294, + "step": 14017, + "teacher_loss": 0.23494967818260193 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.45542657375335693, + "learning_rate": 2.213321764558753e-05, + "loss": 0.2368, + "step": 14018, + "teacher_loss": 0.2124527394771576 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.3774620592594147, + "learning_rate": 2.2131219568450344e-05, + "loss": 0.248, + "step": 14019, + "teacher_loss": 0.2336605191230774 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.11900528520345688, + "learning_rate": 2.2129221327815675e-05, + "loss": 0.2273, + "step": 14020, + "teacher_loss": 0.23933851718902588 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.43570637702941895, + "learning_rate": 2.2127222923729332e-05, + "loss": 0.2738, + "step": 14021, + "teacher_loss": 0.255815327167511 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.5605031847953796, + "learning_rate": 2.2125224356237136e-05, + "loss": 0.256, + "step": 14022, + "teacher_loss": 0.22221572697162628 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.7855397462844849, + "learning_rate": 2.2123225625384913e-05, + "loss": 0.2777, + "step": 14023, + "teacher_loss": 0.22123906016349792 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.23428291082382202, + "learning_rate": 2.212122673121848e-05, + "loss": 0.1426, + "step": 14024, + "teacher_loss": 0.13236159086227417 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.1735738217830658, + "learning_rate": 2.2119227673783668e-05, + "loss": 0.2417, + "step": 14025, + "teacher_loss": 0.24928683042526245 + }, + { + "compression_loss": 0.0, + "epoch": 2.53, + "label_loss": 0.1873970329761505, + "learning_rate": 2.2117228453126308e-05, + "loss": 0.292, + "step": 14026, + "teacher_loss": 0.3035784363746643 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.627112627029419, + "learning_rate": 2.2115229069292246e-05, + "loss": 0.3128, + "step": 14027, + "teacher_loss": 0.2778727412223816 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3382111191749573, + "learning_rate": 2.211322952232731e-05, + "loss": 0.2839, + "step": 14028, + "teacher_loss": 0.2779051661491394 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.410192608833313, + "learning_rate": 2.211122981227734e-05, + "loss": 0.2619, + "step": 14029, + "teacher_loss": 0.2454051971435547 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.30241554975509644, + "learning_rate": 2.2109229939188202e-05, + "loss": 0.2389, + "step": 14030, + "teacher_loss": 0.23184853792190552 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3824981451034546, + "learning_rate": 2.210722990310573e-05, + "loss": 0.3287, + "step": 14031, + "teacher_loss": 0.32273900508880615 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.7765858173370361, + "learning_rate": 2.210522970407578e-05, + "loss": 0.3054, + "step": 14032, + "teacher_loss": 0.2530953586101532 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3652813732624054, + "learning_rate": 2.2103229342144225e-05, + "loss": 0.2904, + "step": 14033, + "teacher_loss": 0.2821311950683594 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5748870372772217, + "learning_rate": 2.2101228817356912e-05, + "loss": 0.2505, + "step": 14034, + "teacher_loss": 0.2144603133201599 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.6449252367019653, + "learning_rate": 2.2099228129759718e-05, + "loss": 0.2449, + "step": 14035, + "teacher_loss": 0.2004542201757431 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3554195761680603, + "learning_rate": 2.2097227279398506e-05, + "loss": 0.223, + "step": 14036, + "teacher_loss": 0.20832259953022003 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.39301687479019165, + "learning_rate": 2.2095226266319145e-05, + "loss": 0.2595, + "step": 14037, + "teacher_loss": 0.24470297992229462 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.4760388135910034, + "learning_rate": 2.209322509056753e-05, + "loss": 0.2285, + "step": 14038, + "teacher_loss": 0.20096711814403534 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5733082890510559, + "learning_rate": 2.2091223752189522e-05, + "loss": 0.2264, + "step": 14039, + "teacher_loss": 0.1878947913646698 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.15415360033512115, + "learning_rate": 2.2089222251231014e-05, + "loss": 0.1193, + "step": 14040, + "teacher_loss": 0.11538200080394745 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.2853025794029236, + "learning_rate": 2.2087220587737896e-05, + "loss": 0.2228, + "step": 14041, + "teacher_loss": 0.21588841080665588 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.4536241292953491, + "learning_rate": 2.2085218761756058e-05, + "loss": 0.3283, + "step": 14042, + "teacher_loss": 0.3143380880355835 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.6075706481933594, + "learning_rate": 2.2083216773331394e-05, + "loss": 0.2907, + "step": 14043, + "teacher_loss": 0.2555469870567322 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3903805613517761, + "learning_rate": 2.208121462250981e-05, + "loss": 0.2476, + "step": 14044, + "teacher_loss": 0.23174268007278442 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.48584216833114624, + "learning_rate": 2.2079212309337205e-05, + "loss": 0.2621, + "step": 14045, + "teacher_loss": 0.23720073699951172 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.31547147035598755, + "learning_rate": 2.207720983385948e-05, + "loss": 0.1865, + "step": 14046, + "teacher_loss": 0.1721748411655426 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.523248553276062, + "learning_rate": 2.207520719612256e-05, + "loss": 0.3568, + "step": 14047, + "teacher_loss": 0.3383296728134155 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.4042520225048065, + "learning_rate": 2.207320439617235e-05, + "loss": 0.3066, + "step": 14048, + "teacher_loss": 0.2956960201263428 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5169178247451782, + "learning_rate": 2.2071201434054772e-05, + "loss": 0.2454, + "step": 14049, + "teacher_loss": 0.21523939073085785 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.17877496778964996, + "learning_rate": 2.2069198309815738e-05, + "loss": 0.1766, + "step": 14050, + "teacher_loss": 0.17639514803886414 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5083367824554443, + "learning_rate": 2.2067195023501187e-05, + "loss": 0.2518, + "step": 14051, + "teacher_loss": 0.22329753637313843 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.7437264919281006, + "learning_rate": 2.206519157515704e-05, + "loss": 0.3912, + "step": 14052, + "teacher_loss": 0.35206368565559387 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.31929442286491394, + "learning_rate": 2.2063187964829237e-05, + "loss": 0.2075, + "step": 14053, + "teacher_loss": 0.1950480192899704 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3612954318523407, + "learning_rate": 2.2061184192563702e-05, + "loss": 0.2389, + "step": 14054, + "teacher_loss": 0.22525353729724884 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3310979902744293, + "learning_rate": 2.2059180258406394e-05, + "loss": 0.2471, + "step": 14055, + "teacher_loss": 0.23775094747543335 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.246351420879364, + "learning_rate": 2.205717616240324e-05, + "loss": 0.2109, + "step": 14056, + "teacher_loss": 0.20690733194351196 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.3076486587524414, + "learning_rate": 2.2055171904600202e-05, + "loss": 0.186, + "step": 14057, + "teacher_loss": 0.17251771688461304 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.29886895418167114, + "learning_rate": 2.205316748504322e-05, + "loss": 0.3678, + "step": 14058, + "teacher_loss": 0.37546294927597046 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.21419644355773926, + "learning_rate": 2.2051162903778252e-05, + "loss": 0.2041, + "step": 14059, + "teacher_loss": 0.20295082032680511 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.9435220956802368, + "learning_rate": 2.2049158160851263e-05, + "loss": 0.3585, + "step": 14060, + "teacher_loss": 0.2934580147266388 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.4388246536254883, + "learning_rate": 2.2047153256308212e-05, + "loss": 0.2681, + "step": 14061, + "teacher_loss": 0.2491452395915985 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.808463454246521, + "learning_rate": 2.204514819019506e-05, + "loss": 0.2297, + "step": 14062, + "teacher_loss": 0.1653864085674286 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.26344990730285645, + "learning_rate": 2.204314296255779e-05, + "loss": 0.1913, + "step": 14063, + "teacher_loss": 0.18326006829738617 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.12900444865226746, + "learning_rate": 2.204113757344236e-05, + "loss": 0.1973, + "step": 14064, + "teacher_loss": 0.20484323799610138 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5820817351341248, + "learning_rate": 2.2039132022894763e-05, + "loss": 0.2763, + "step": 14065, + "teacher_loss": 0.24229255318641663 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.2785002887248993, + "learning_rate": 2.203712631096097e-05, + "loss": 0.1749, + "step": 14066, + "teacher_loss": 0.16340306401252747 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.6004549264907837, + "learning_rate": 2.203512043768697e-05, + "loss": 0.286, + "step": 14067, + "teacher_loss": 0.25104573369026184 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.726807713508606, + "learning_rate": 2.2033114403118754e-05, + "loss": 0.4151, + "step": 14068, + "teacher_loss": 0.38051921129226685 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.8822823762893677, + "learning_rate": 2.2031108207302303e-05, + "loss": 0.4227, + "step": 14069, + "teacher_loss": 0.37167200446128845 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.30861830711364746, + "learning_rate": 2.2029101850283628e-05, + "loss": 0.208, + "step": 14070, + "teacher_loss": 0.19686290621757507 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5329614281654358, + "learning_rate": 2.202709533210872e-05, + "loss": 0.4612, + "step": 14071, + "teacher_loss": 0.4532531201839447 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.347231924533844, + "learning_rate": 2.202508865282358e-05, + "loss": 0.1809, + "step": 14072, + "teacher_loss": 0.1624601185321808 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.681158185005188, + "learning_rate": 2.2023081812474224e-05, + "loss": 0.2956, + "step": 14073, + "teacher_loss": 0.2527763247489929 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.38196295499801636, + "learning_rate": 2.202107481110666e-05, + "loss": 0.2956, + "step": 14074, + "teacher_loss": 0.285977303981781 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.5220724940299988, + "learning_rate": 2.2019067648766895e-05, + "loss": 0.2158, + "step": 14075, + "teacher_loss": 0.18174313008785248 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.4009571373462677, + "learning_rate": 2.201706032550096e-05, + "loss": 0.2927, + "step": 14076, + "teacher_loss": 0.2806204855442047 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.20406538248062134, + "learning_rate": 2.2015052841354866e-05, + "loss": 0.1668, + "step": 14077, + "teacher_loss": 0.16261330246925354 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.507172703742981, + "learning_rate": 2.2013045196374645e-05, + "loss": 0.2537, + "step": 14078, + "teacher_loss": 0.22549480199813843 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.44694024324417114, + "learning_rate": 2.201103739060632e-05, + "loss": 0.2156, + "step": 14079, + "teacher_loss": 0.18989154696464539 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 0.7450734376907349, + "learning_rate": 2.200902942409593e-05, + "loss": 0.6223, + "step": 14080, + "teacher_loss": 0.608710527420044 + }, + { + "compression_loss": 0.0, + "epoch": 2.54, + "label_loss": 1.007805585861206, + "learning_rate": 2.200702129688951e-05, + "loss": 0.3065, + "step": 14081, + "teacher_loss": 0.22858496010303497 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.3384752571582794, + "learning_rate": 2.20050130090331e-05, + "loss": 0.2325, + "step": 14082, + "teacher_loss": 0.22076211869716644 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5357757806777954, + "learning_rate": 2.200300456057274e-05, + "loss": 0.4807, + "step": 14083, + "teacher_loss": 0.47454819083213806 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.23482458293437958, + "learning_rate": 2.2000995951554487e-05, + "loss": 0.1895, + "step": 14084, + "teacher_loss": 0.18445265293121338 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.24944043159484863, + "learning_rate": 2.1998987182024384e-05, + "loss": 0.2395, + "step": 14085, + "teacher_loss": 0.23838230967521667 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.2411850094795227, + "learning_rate": 2.199697825202849e-05, + "loss": 0.3374, + "step": 14086, + "teacher_loss": 0.3481428325176239 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.23642152547836304, + "learning_rate": 2.199496916161286e-05, + "loss": 0.2187, + "step": 14087, + "teacher_loss": 0.21671488881111145 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.35776764154434204, + "learning_rate": 2.1992959910823563e-05, + "loss": 0.2284, + "step": 14088, + "teacher_loss": 0.21400777995586395 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.4272218346595764, + "learning_rate": 2.199095049970666e-05, + "loss": 0.229, + "step": 14089, + "teacher_loss": 0.20693504810333252 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.36526215076446533, + "learning_rate": 2.1988940928308223e-05, + "loss": 0.2763, + "step": 14090, + "teacher_loss": 0.26637548208236694 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.4052009582519531, + "learning_rate": 2.198693119667432e-05, + "loss": 0.2017, + "step": 14091, + "teacher_loss": 0.17905010282993317 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.38812676072120667, + "learning_rate": 2.1984921304851035e-05, + "loss": 0.275, + "step": 14092, + "teacher_loss": 0.26238229870796204 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.9409998655319214, + "learning_rate": 2.198291125288445e-05, + "loss": 0.3418, + "step": 14093, + "teacher_loss": 0.2752014398574829 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.2680700421333313, + "learning_rate": 2.198090104082064e-05, + "loss": 0.1783, + "step": 14094, + "teacher_loss": 0.1683673858642578 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.4754878282546997, + "learning_rate": 2.19788906687057e-05, + "loss": 0.3228, + "step": 14095, + "teacher_loss": 0.30585891008377075 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.4327659010887146, + "learning_rate": 2.1976880136585725e-05, + "loss": 0.415, + "step": 14096, + "teacher_loss": 0.41297274827957153 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.44516468048095703, + "learning_rate": 2.19748694445068e-05, + "loss": 0.223, + "step": 14097, + "teacher_loss": 0.19832664728164673 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.31076425313949585, + "learning_rate": 2.1972858592515037e-05, + "loss": 0.2559, + "step": 14098, + "teacher_loss": 0.2497912347316742 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.6550500392913818, + "learning_rate": 2.197084758065653e-05, + "loss": 0.5141, + "step": 14099, + "teacher_loss": 0.49847814440727234 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5302172899246216, + "learning_rate": 2.1968836408977384e-05, + "loss": 0.2613, + "step": 14100, + "teacher_loss": 0.2314019650220871 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5367562770843506, + "learning_rate": 2.1966825077523718e-05, + "loss": 0.3119, + "step": 14101, + "teacher_loss": 0.28687891364097595 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.34987181425094604, + "learning_rate": 2.1964813586341636e-05, + "loss": 0.3052, + "step": 14102, + "teacher_loss": 0.30019691586494446 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.305778443813324, + "learning_rate": 2.1962801935477263e-05, + "loss": 0.1896, + "step": 14103, + "teacher_loss": 0.17672264575958252 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.6921484470367432, + "learning_rate": 2.196079012497672e-05, + "loss": 0.2512, + "step": 14104, + "teacher_loss": 0.20222973823547363 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.28185778856277466, + "learning_rate": 2.195877815488612e-05, + "loss": 0.2756, + "step": 14105, + "teacher_loss": 0.2749515771865845 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.8075480461120605, + "learning_rate": 2.1956766025251607e-05, + "loss": 0.3559, + "step": 14106, + "teacher_loss": 0.30566275119781494 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.30305686593055725, + "learning_rate": 2.1954753736119306e-05, + "loss": 0.2659, + "step": 14107, + "teacher_loss": 0.2617884576320648 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.7195541858673096, + "learning_rate": 2.1952741287535356e-05, + "loss": 0.2958, + "step": 14108, + "teacher_loss": 0.24873146414756775 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5910072922706604, + "learning_rate": 2.1950728679545887e-05, + "loss": 0.2327, + "step": 14109, + "teacher_loss": 0.19285470247268677 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.4437260627746582, + "learning_rate": 2.1948715912197052e-05, + "loss": 0.2729, + "step": 14110, + "teacher_loss": 0.2539476752281189 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.3492084741592407, + "learning_rate": 2.1946702985534994e-05, + "loss": 0.2295, + "step": 14111, + "teacher_loss": 0.21623246371746063 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.12968352437019348, + "learning_rate": 2.1944689899605867e-05, + "loss": 0.2091, + "step": 14112, + "teacher_loss": 0.21796298027038574 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5501447916030884, + "learning_rate": 2.194267665445582e-05, + "loss": 0.2078, + "step": 14113, + "teacher_loss": 0.16978394985198975 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.3403867483139038, + "learning_rate": 2.1940663250131008e-05, + "loss": 0.1744, + "step": 14114, + "teacher_loss": 0.15599872171878815 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5268205404281616, + "learning_rate": 2.19386496866776e-05, + "loss": 0.4073, + "step": 14115, + "teacher_loss": 0.39399218559265137 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.22666972875595093, + "learning_rate": 2.1936635964141763e-05, + "loss": 0.2997, + "step": 14116, + "teacher_loss": 0.30776381492614746 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.3363306224346161, + "learning_rate": 2.1934622082569655e-05, + "loss": 0.2523, + "step": 14117, + "teacher_loss": 0.24298641085624695 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5341652035713196, + "learning_rate": 2.1932608042007453e-05, + "loss": 0.3433, + "step": 14118, + "teacher_loss": 0.32211798429489136 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 1.2450436353683472, + "learning_rate": 2.1930593842501335e-05, + "loss": 0.3397, + "step": 14119, + "teacher_loss": 0.23913028836250305 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.41809555888175964, + "learning_rate": 2.192857948409748e-05, + "loss": 0.2989, + "step": 14120, + "teacher_loss": 0.28570622205734253 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.2445700615644455, + "learning_rate": 2.192656496684207e-05, + "loss": 0.2921, + "step": 14121, + "teacher_loss": 0.2974112629890442 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.45156484842300415, + "learning_rate": 2.192455029078129e-05, + "loss": 0.3054, + "step": 14122, + "teacher_loss": 0.28919684886932373 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.4840802252292633, + "learning_rate": 2.1922535455961333e-05, + "loss": 0.2587, + "step": 14123, + "teacher_loss": 0.23364229500293732 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.34423699975013733, + "learning_rate": 2.19205204624284e-05, + "loss": 0.2658, + "step": 14124, + "teacher_loss": 0.25707298517227173 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.29873815178871155, + "learning_rate": 2.191850531022867e-05, + "loss": 0.228, + "step": 14125, + "teacher_loss": 0.22011500597000122 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.24945895373821259, + "learning_rate": 2.1916489999408366e-05, + "loss": 0.144, + "step": 14126, + "teacher_loss": 0.13223929703235626 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.3605618476867676, + "learning_rate": 2.191447453001368e-05, + "loss": 0.2629, + "step": 14127, + "teacher_loss": 0.25203514099121094 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.34237921237945557, + "learning_rate": 2.1912458902090823e-05, + "loss": 0.1913, + "step": 14128, + "teacher_loss": 0.17449024319648743 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.7491214275360107, + "learning_rate": 2.191044311568601e-05, + "loss": 0.2229, + "step": 14129, + "teacher_loss": 0.16445466876029968 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.5319511890411377, + "learning_rate": 2.190842717084545e-05, + "loss": 0.305, + "step": 14130, + "teacher_loss": 0.27978986501693726 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.40109291672706604, + "learning_rate": 2.190641106761537e-05, + "loss": 0.1922, + "step": 14131, + "teacher_loss": 0.16897651553153992 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.362934410572052, + "learning_rate": 2.190439480604199e-05, + "loss": 0.2259, + "step": 14132, + "teacher_loss": 0.21069568395614624 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.20084592700004578, + "learning_rate": 2.1902378386171542e-05, + "loss": 0.2229, + "step": 14133, + "teacher_loss": 0.2253381311893463 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.42890846729278564, + "learning_rate": 2.1900361808050246e-05, + "loss": 0.2456, + "step": 14134, + "teacher_loss": 0.22517837584018707 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.8364607095718384, + "learning_rate": 2.1898345071724344e-05, + "loss": 0.3253, + "step": 14135, + "teacher_loss": 0.2684541344642639 + }, + { + "compression_loss": 0.0, + "epoch": 2.55, + "label_loss": 0.32684433460235596, + "learning_rate": 2.1896328177240074e-05, + "loss": 0.2289, + "step": 14136, + "teacher_loss": 0.21796786785125732 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2907610535621643, + "learning_rate": 2.1894311124643675e-05, + "loss": 0.2473, + "step": 14137, + "teacher_loss": 0.2424989938735962 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2031404823064804, + "learning_rate": 2.189229391398139e-05, + "loss": 0.1887, + "step": 14138, + "teacher_loss": 0.18708856403827667 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.31731581687927246, + "learning_rate": 2.189027654529947e-05, + "loss": 0.2541, + "step": 14139, + "teacher_loss": 0.24708092212677002 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4953751564025879, + "learning_rate": 2.1888259018644167e-05, + "loss": 0.3317, + "step": 14140, + "teacher_loss": 0.3135678768157959 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.0603305846452713, + "learning_rate": 2.1886241334061737e-05, + "loss": 0.1308, + "step": 14141, + "teacher_loss": 0.13861612975597382 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.6661931276321411, + "learning_rate": 2.1884223491598443e-05, + "loss": 0.3738, + "step": 14142, + "teacher_loss": 0.34127065539360046 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.45140206813812256, + "learning_rate": 2.1882205491300542e-05, + "loss": 0.3614, + "step": 14143, + "teacher_loss": 0.35140180587768555 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.31395697593688965, + "learning_rate": 2.1880187333214297e-05, + "loss": 0.2926, + "step": 14144, + "teacher_loss": 0.2902813255786896 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5264514684677124, + "learning_rate": 2.187816901738599e-05, + "loss": 0.265, + "step": 14145, + "teacher_loss": 0.23598219454288483 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2271890640258789, + "learning_rate": 2.187615054386189e-05, + "loss": 0.2937, + "step": 14146, + "teacher_loss": 0.3011230528354645 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.44491517543792725, + "learning_rate": 2.187413191268827e-05, + "loss": 0.2348, + "step": 14147, + "teacher_loss": 0.21148845553398132 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.22685542702674866, + "learning_rate": 2.187211312391142e-05, + "loss": 0.2119, + "step": 14148, + "teacher_loss": 0.2102307677268982 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.31112754344940186, + "learning_rate": 2.1870094177577614e-05, + "loss": 0.1886, + "step": 14149, + "teacher_loss": 0.17495453357696533 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.1820644736289978, + "learning_rate": 2.1868075073733147e-05, + "loss": 0.217, + "step": 14150, + "teacher_loss": 0.22091570496559143 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.777638852596283, + "learning_rate": 2.1866055812424317e-05, + "loss": 0.3151, + "step": 14151, + "teacher_loss": 0.2636851370334625 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.3828042447566986, + "learning_rate": 2.1864036393697405e-05, + "loss": 0.2685, + "step": 14152, + "teacher_loss": 0.2558153569698334 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.49958133697509766, + "learning_rate": 2.186201681759872e-05, + "loss": 0.1867, + "step": 14153, + "teacher_loss": 0.15198293328285217 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2810642719268799, + "learning_rate": 2.1859997084174562e-05, + "loss": 0.2653, + "step": 14154, + "teacher_loss": 0.2635181248188019 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.1996690034866333, + "learning_rate": 2.185797719347124e-05, + "loss": 0.2062, + "step": 14155, + "teacher_loss": 0.2069195955991745 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.34417709708213806, + "learning_rate": 2.1855957145535068e-05, + "loss": 0.1465, + "step": 14156, + "teacher_loss": 0.124505415558815 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4435366690158844, + "learning_rate": 2.1853936940412342e-05, + "loss": 0.2345, + "step": 14157, + "teacher_loss": 0.21125584840774536 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5421137809753418, + "learning_rate": 2.18519165781494e-05, + "loss": 0.3176, + "step": 14158, + "teacher_loss": 0.29266357421875 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5976833701133728, + "learning_rate": 2.184989605879255e-05, + "loss": 0.4075, + "step": 14159, + "teacher_loss": 0.38640332221984863 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5281933546066284, + "learning_rate": 2.184787538238812e-05, + "loss": 0.2753, + "step": 14160, + "teacher_loss": 0.24717864394187927 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.48728421330451965, + "learning_rate": 2.184585454898244e-05, + "loss": 0.5523, + "step": 14161, + "teacher_loss": 0.5595563650131226 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4356938898563385, + "learning_rate": 2.184383355862184e-05, + "loss": 0.2105, + "step": 14162, + "teacher_loss": 0.18547439575195312 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4942814111709595, + "learning_rate": 2.1841812411352658e-05, + "loss": 0.2739, + "step": 14163, + "teacher_loss": 0.24946808815002441 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2307223081588745, + "learning_rate": 2.1839791107221228e-05, + "loss": 0.1663, + "step": 14164, + "teacher_loss": 0.15912988781929016 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5112999677658081, + "learning_rate": 2.1837769646273892e-05, + "loss": 0.2639, + "step": 14165, + "teacher_loss": 0.23640765249729156 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.8613361120223999, + "learning_rate": 2.1835748028557004e-05, + "loss": 0.5079, + "step": 14166, + "teacher_loss": 0.4686659276485443 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5411689877510071, + "learning_rate": 2.1833726254116903e-05, + "loss": 0.2326, + "step": 14167, + "teacher_loss": 0.1983032524585724 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5302461385726929, + "learning_rate": 2.183170432299995e-05, + "loss": 0.3378, + "step": 14168, + "teacher_loss": 0.3163986802101135 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.44137582182884216, + "learning_rate": 2.18296822352525e-05, + "loss": 0.2827, + "step": 14169, + "teacher_loss": 0.2650667428970337 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5907478332519531, + "learning_rate": 2.182765999092091e-05, + "loss": 0.2359, + "step": 14170, + "teacher_loss": 0.19652211666107178 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.27910155057907104, + "learning_rate": 2.182563759005155e-05, + "loss": 0.2895, + "step": 14171, + "teacher_loss": 0.29067856073379517 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4879056513309479, + "learning_rate": 2.1823615032690786e-05, + "loss": 0.3507, + "step": 14172, + "teacher_loss": 0.33540865778923035 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.7178019285202026, + "learning_rate": 2.1821592318884983e-05, + "loss": 0.223, + "step": 14173, + "teacher_loss": 0.16799131035804749 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.39833736419677734, + "learning_rate": 2.181956944868052e-05, + "loss": 0.2742, + "step": 14174, + "teacher_loss": 0.26039671897888184 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.3856354057788849, + "learning_rate": 2.1817546422123777e-05, + "loss": 0.2339, + "step": 14175, + "teacher_loss": 0.21701905131340027 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5523322224617004, + "learning_rate": 2.1815523239261137e-05, + "loss": 0.2863, + "step": 14176, + "teacher_loss": 0.25678563117980957 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.38658204674720764, + "learning_rate": 2.181349990013898e-05, + "loss": 0.2747, + "step": 14177, + "teacher_loss": 0.26232317090034485 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.40355247259140015, + "learning_rate": 2.18114764048037e-05, + "loss": 0.271, + "step": 14178, + "teacher_loss": 0.2563096880912781 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.411508172750473, + "learning_rate": 2.180945275330169e-05, + "loss": 0.2851, + "step": 14179, + "teacher_loss": 0.2710726857185364 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.48566436767578125, + "learning_rate": 2.180742894567934e-05, + "loss": 0.3208, + "step": 14180, + "teacher_loss": 0.3025311231613159 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.20909595489501953, + "learning_rate": 2.180540498198306e-05, + "loss": 0.2366, + "step": 14181, + "teacher_loss": 0.23965993523597717 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4929698705673218, + "learning_rate": 2.180338086225924e-05, + "loss": 0.2245, + "step": 14182, + "teacher_loss": 0.1946302354335785 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 1.0289430618286133, + "learning_rate": 2.1801356586554298e-05, + "loss": 0.3652, + "step": 14183, + "teacher_loss": 0.29142096638679504 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5295517444610596, + "learning_rate": 2.179933215491464e-05, + "loss": 0.5305, + "step": 14184, + "teacher_loss": 0.5305664539337158 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.3709298372268677, + "learning_rate": 2.179730756738668e-05, + "loss": 0.2261, + "step": 14185, + "teacher_loss": 0.2100437581539154 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.5170134902000427, + "learning_rate": 2.1795282824016845e-05, + "loss": 0.2354, + "step": 14186, + "teacher_loss": 0.20409107208251953 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.4345567226409912, + "learning_rate": 2.1793257924851534e-05, + "loss": 0.3006, + "step": 14187, + "teacher_loss": 0.2856846749782562 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2764303982257843, + "learning_rate": 2.17912328699372e-05, + "loss": 0.1969, + "step": 14188, + "teacher_loss": 0.18808524310588837 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.8379099369049072, + "learning_rate": 2.178920765932025e-05, + "loss": 0.4773, + "step": 14189, + "teacher_loss": 0.43725278973579407 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.3498167395591736, + "learning_rate": 2.1787182293047123e-05, + "loss": 0.2576, + "step": 14190, + "teacher_loss": 0.2473965585231781 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.2589060664176941, + "learning_rate": 2.1785156771164256e-05, + "loss": 0.2593, + "step": 14191, + "teacher_loss": 0.2593681216239929 + }, + { + "compression_loss": 0.0, + "epoch": 2.56, + "label_loss": 0.7577589154243469, + "learning_rate": 2.1783131093718086e-05, + "loss": 0.3266, + "step": 14192, + "teacher_loss": 0.27871954441070557 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.4304571747779846, + "learning_rate": 2.178110526075506e-05, + "loss": 0.3244, + "step": 14193, + "teacher_loss": 0.31262266635894775 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5653932094573975, + "learning_rate": 2.1779079272321623e-05, + "loss": 0.2497, + "step": 14194, + "teacher_loss": 0.21467682719230652 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.7908973693847656, + "learning_rate": 2.177705312846422e-05, + "loss": 0.2804, + "step": 14195, + "teacher_loss": 0.2236693799495697 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.45874154567718506, + "learning_rate": 2.1775026829229306e-05, + "loss": 0.2316, + "step": 14196, + "teacher_loss": 0.2064008265733719 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5312547087669373, + "learning_rate": 2.177300037466334e-05, + "loss": 0.2646, + "step": 14197, + "teacher_loss": 0.23500633239746094 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.19308380782604218, + "learning_rate": 2.1770973764812785e-05, + "loss": 0.2159, + "step": 14198, + "teacher_loss": 0.21842306852340698 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.22375868260860443, + "learning_rate": 2.17689469997241e-05, + "loss": 0.279, + "step": 14199, + "teacher_loss": 0.28512609004974365 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.6033531427383423, + "learning_rate": 2.1766920079443753e-05, + "loss": 0.277, + "step": 14200, + "teacher_loss": 0.24078398942947388 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5749766826629639, + "learning_rate": 2.176489300401822e-05, + "loss": 0.3308, + "step": 14201, + "teacher_loss": 0.3036767244338989 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.3687785267829895, + "learning_rate": 2.176286577349397e-05, + "loss": 0.2244, + "step": 14202, + "teacher_loss": 0.2083839774131775 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5712060928344727, + "learning_rate": 2.1760838387917485e-05, + "loss": 0.3084, + "step": 14203, + "teacher_loss": 0.27922940254211426 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.8496137857437134, + "learning_rate": 2.1758810847335243e-05, + "loss": 0.2912, + "step": 14204, + "teacher_loss": 0.22920575737953186 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.0672571212053299, + "learning_rate": 2.1756783151793737e-05, + "loss": 0.1311, + "step": 14205, + "teacher_loss": 0.1381981074810028 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.47197240591049194, + "learning_rate": 2.1754755301339453e-05, + "loss": 0.2643, + "step": 14206, + "teacher_loss": 0.2412358522415161 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5092145204544067, + "learning_rate": 2.1752727296018872e-05, + "loss": 0.3291, + "step": 14207, + "teacher_loss": 0.30904000997543335 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.4917996823787689, + "learning_rate": 2.1750699135878507e-05, + "loss": 0.2319, + "step": 14208, + "teacher_loss": 0.2030460238456726 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.3462759554386139, + "learning_rate": 2.174867082096485e-05, + "loss": 0.2419, + "step": 14209, + "teacher_loss": 0.23030412197113037 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.49086758494377136, + "learning_rate": 2.1746642351324402e-05, + "loss": 0.3238, + "step": 14210, + "teacher_loss": 0.3052000403404236 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.3395147919654846, + "learning_rate": 2.1744613727003677e-05, + "loss": 0.2274, + "step": 14211, + "teacher_loss": 0.21495041251182556 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.36897778511047363, + "learning_rate": 2.1742584948049175e-05, + "loss": 0.2934, + "step": 14212, + "teacher_loss": 0.2849540710449219 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.3142859637737274, + "learning_rate": 2.174055601450742e-05, + "loss": 0.2039, + "step": 14213, + "teacher_loss": 0.19160521030426025 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5150015354156494, + "learning_rate": 2.1738526926424925e-05, + "loss": 0.2692, + "step": 14214, + "teacher_loss": 0.241885244846344 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.2842918634414673, + "learning_rate": 2.1736497683848204e-05, + "loss": 0.2968, + "step": 14215, + "teacher_loss": 0.29816338419914246 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.6323486566543579, + "learning_rate": 2.17344682868238e-05, + "loss": 0.3458, + "step": 14216, + "teacher_loss": 0.3139745593070984 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5285513401031494, + "learning_rate": 2.173243873539822e-05, + "loss": 0.2804, + "step": 14217, + "teacher_loss": 0.2528434693813324 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.448128879070282, + "learning_rate": 2.1730409029618004e-05, + "loss": 0.2399, + "step": 14218, + "teacher_loss": 0.2167319655418396 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5915016531944275, + "learning_rate": 2.1728379169529693e-05, + "loss": 0.2414, + "step": 14219, + "teacher_loss": 0.20250959694385529 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.6359156370162964, + "learning_rate": 2.1726349155179817e-05, + "loss": 0.4224, + "step": 14220, + "teacher_loss": 0.39868664741516113 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.14802348613739014, + "learning_rate": 2.172431898661492e-05, + "loss": 0.1713, + "step": 14221, + "teacher_loss": 0.17383426427841187 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5322903394699097, + "learning_rate": 2.1722288663881555e-05, + "loss": 0.5258, + "step": 14222, + "teacher_loss": 0.525030255317688 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.3155434727668762, + "learning_rate": 2.1720258187026258e-05, + "loss": 0.2677, + "step": 14223, + "teacher_loss": 0.2623414993286133 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5718631744384766, + "learning_rate": 2.17182275560956e-05, + "loss": 0.3774, + "step": 14224, + "teacher_loss": 0.3557378053665161 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.22144627571105957, + "learning_rate": 2.1716196771136115e-05, + "loss": 0.2091, + "step": 14225, + "teacher_loss": 0.20774388313293457 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.37791627645492554, + "learning_rate": 2.171416583219438e-05, + "loss": 0.2642, + "step": 14226, + "teacher_loss": 0.25157007575035095 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.20667049288749695, + "learning_rate": 2.1712134739316955e-05, + "loss": 0.1768, + "step": 14227, + "teacher_loss": 0.17345619201660156 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.33477669954299927, + "learning_rate": 2.1710103492550396e-05, + "loss": 0.2359, + "step": 14228, + "teacher_loss": 0.22491490840911865 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.928906261920929, + "learning_rate": 2.170807209194129e-05, + "loss": 0.3721, + "step": 14229, + "teacher_loss": 0.31026798486709595 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.9561784267425537, + "learning_rate": 2.17060405375362e-05, + "loss": 0.3168, + "step": 14230, + "teacher_loss": 0.24571369588375092 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.770319402217865, + "learning_rate": 2.170400882938171e-05, + "loss": 0.5305, + "step": 14231, + "teacher_loss": 0.5038201808929443 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.49957719445228577, + "learning_rate": 2.1701976967524388e-05, + "loss": 0.2385, + "step": 14232, + "teacher_loss": 0.2095131278038025 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.2698243260383606, + "learning_rate": 2.1699944952010832e-05, + "loss": 0.2198, + "step": 14233, + "teacher_loss": 0.21420526504516602 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.6613113880157471, + "learning_rate": 2.1697912782887627e-05, + "loss": 0.2718, + "step": 14234, + "teacher_loss": 0.2284790277481079 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.34646034240722656, + "learning_rate": 2.1695880460201364e-05, + "loss": 0.2587, + "step": 14235, + "teacher_loss": 0.24891141057014465 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.4663724899291992, + "learning_rate": 2.1693847983998636e-05, + "loss": 0.2451, + "step": 14236, + "teacher_loss": 0.22052177786827087 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.32446038722991943, + "learning_rate": 2.1691815354326037e-05, + "loss": 0.2313, + "step": 14237, + "teacher_loss": 0.22098299860954285 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.6764269471168518, + "learning_rate": 2.1689782571230184e-05, + "loss": 0.3134, + "step": 14238, + "teacher_loss": 0.27308785915374756 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.8967772722244263, + "learning_rate": 2.168774963475767e-05, + "loss": 0.3302, + "step": 14239, + "teacher_loss": 0.2672373354434967 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.830818772315979, + "learning_rate": 2.1685716544955108e-05, + "loss": 0.3703, + "step": 14240, + "teacher_loss": 0.3191695809364319 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.38655751943588257, + "learning_rate": 2.1683683301869116e-05, + "loss": 0.26, + "step": 14241, + "teacher_loss": 0.24599100649356842 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.7583180665969849, + "learning_rate": 2.1681649905546298e-05, + "loss": 0.3078, + "step": 14242, + "teacher_loss": 0.25770893692970276 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.8816455602645874, + "learning_rate": 2.167961635603328e-05, + "loss": 0.9039, + "step": 14243, + "teacher_loss": 0.9063464403152466 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.8013314604759216, + "learning_rate": 2.167758265337669e-05, + "loss": 0.3109, + "step": 14244, + "teacher_loss": 0.2563667297363281 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.43961644172668457, + "learning_rate": 2.1675548797623144e-05, + "loss": 0.2405, + "step": 14245, + "teacher_loss": 0.2183992564678192 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.5126307010650635, + "learning_rate": 2.1673514788819283e-05, + "loss": 0.2553, + "step": 14246, + "teacher_loss": 0.22670122981071472 + }, + { + "compression_loss": 0.0, + "epoch": 2.57, + "label_loss": 0.9087996482849121, + "learning_rate": 2.1671480627011734e-05, + "loss": 0.2797, + "step": 14247, + "teacher_loss": 0.20979446172714233 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.7375493049621582, + "learning_rate": 2.1669446312247138e-05, + "loss": 0.2837, + "step": 14248, + "teacher_loss": 0.23330962657928467 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.17408889532089233, + "learning_rate": 2.166741184457214e-05, + "loss": 0.1773, + "step": 14249, + "teacher_loss": 0.17760753631591797 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5018510222434998, + "learning_rate": 2.1665377224033365e-05, + "loss": 0.2026, + "step": 14250, + "teacher_loss": 0.16940000653266907 + }, + { + "epoch": 2.58, + "eval_exact_match": 79.81078524124882, + "eval_f1": 87.07273064226436, + "step": 14250 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.44981861114501953, + "learning_rate": 2.166334245067748e-05, + "loss": 0.2027, + "step": 14251, + "teacher_loss": 0.17528456449508667 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.2728629410266876, + "learning_rate": 2.1661307524551134e-05, + "loss": 0.238, + "step": 14252, + "teacher_loss": 0.23409652709960938 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.8141746520996094, + "learning_rate": 2.1659272445700972e-05, + "loss": 0.3649, + "step": 14253, + "teacher_loss": 0.3149639666080475 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5118100047111511, + "learning_rate": 2.1657237214173664e-05, + "loss": 0.3378, + "step": 14254, + "teacher_loss": 0.318419873714447 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.500604510307312, + "learning_rate": 2.1655201830015864e-05, + "loss": 0.2957, + "step": 14255, + "teacher_loss": 0.2728869915008545 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.4157668948173523, + "learning_rate": 2.165316629327424e-05, + "loss": 0.2265, + "step": 14256, + "teacher_loss": 0.20551112294197083 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5615580081939697, + "learning_rate": 2.165113060399546e-05, + "loss": 0.2507, + "step": 14257, + "teacher_loss": 0.21621017158031464 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.7114548683166504, + "learning_rate": 2.1649094762226195e-05, + "loss": 0.2624, + "step": 14258, + "teacher_loss": 0.21254561841487885 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.6866539716720581, + "learning_rate": 2.1647058768013125e-05, + "loss": 0.2453, + "step": 14259, + "teacher_loss": 0.1963009536266327 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.47950679063796997, + "learning_rate": 2.1645022621402923e-05, + "loss": 0.2007, + "step": 14260, + "teacher_loss": 0.1697450578212738 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5898662209510803, + "learning_rate": 2.1642986322442276e-05, + "loss": 0.2704, + "step": 14261, + "teacher_loss": 0.23486298322677612 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.7955036759376526, + "learning_rate": 2.1640949871177868e-05, + "loss": 0.5313, + "step": 14262, + "teacher_loss": 0.5019314885139465 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.9793264865875244, + "learning_rate": 2.1638913267656393e-05, + "loss": 0.8924, + "step": 14263, + "teacher_loss": 0.8827388286590576 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.8474443554878235, + "learning_rate": 2.1636876511924542e-05, + "loss": 0.4419, + "step": 14264, + "teacher_loss": 0.3968254625797272 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.397510826587677, + "learning_rate": 2.1634839604029005e-05, + "loss": 0.323, + "step": 14265, + "teacher_loss": 0.3147730827331543 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.4114142656326294, + "learning_rate": 2.1632802544016495e-05, + "loss": 0.2586, + "step": 14266, + "teacher_loss": 0.24160446226596832 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.16140009462833405, + "learning_rate": 2.1630765331933703e-05, + "loss": 0.1986, + "step": 14267, + "teacher_loss": 0.20274126529693604 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.7105826139450073, + "learning_rate": 2.1628727967827346e-05, + "loss": 0.3881, + "step": 14268, + "teacher_loss": 0.35231661796569824 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.40489107370376587, + "learning_rate": 2.1626690451744128e-05, + "loss": 0.2028, + "step": 14269, + "teacher_loss": 0.1803997904062271 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5906544327735901, + "learning_rate": 2.162465278373077e-05, + "loss": 0.3311, + "step": 14270, + "teacher_loss": 0.3022775948047638 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.22036081552505493, + "learning_rate": 2.162261496383398e-05, + "loss": 0.1825, + "step": 14271, + "teacher_loss": 0.17833009362220764 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.45569610595703125, + "learning_rate": 2.162057699210049e-05, + "loss": 0.2424, + "step": 14272, + "teacher_loss": 0.21870452165603638 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.18406696617603302, + "learning_rate": 2.161853886857701e-05, + "loss": 0.2265, + "step": 14273, + "teacher_loss": 0.23124441504478455 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.6203475594520569, + "learning_rate": 2.1616500593310288e-05, + "loss": 0.232, + "step": 14274, + "teacher_loss": 0.1888052374124527 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.2765890061855316, + "learning_rate": 2.1614462166347043e-05, + "loss": 0.2329, + "step": 14275, + "teacher_loss": 0.22800664603710175 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.38806673884391785, + "learning_rate": 2.1612423587734007e-05, + "loss": 0.2324, + "step": 14276, + "teacher_loss": 0.21505972743034363 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.7791194915771484, + "learning_rate": 2.1610384857517926e-05, + "loss": 0.2402, + "step": 14277, + "teacher_loss": 0.18035873770713806 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.36976537108421326, + "learning_rate": 2.1608345975745534e-05, + "loss": 0.241, + "step": 14278, + "teacher_loss": 0.2266956865787506 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.6180068254470825, + "learning_rate": 2.1606306942463592e-05, + "loss": 0.3944, + "step": 14279, + "teacher_loss": 0.3695370852947235 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.25726011395454407, + "learning_rate": 2.160426775771883e-05, + "loss": 0.1602, + "step": 14280, + "teacher_loss": 0.14943283796310425 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 1.0388661623001099, + "learning_rate": 2.1602228421558013e-05, + "loss": 0.3267, + "step": 14281, + "teacher_loss": 0.24756869673728943 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.35848677158355713, + "learning_rate": 2.160018893402789e-05, + "loss": 0.2496, + "step": 14282, + "teacher_loss": 0.2375195324420929 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5121340751647949, + "learning_rate": 2.1598149295175224e-05, + "loss": 0.231, + "step": 14283, + "teacher_loss": 0.19981051981449127 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 1.4362032413482666, + "learning_rate": 2.1596109505046776e-05, + "loss": 0.4103, + "step": 14284, + "teacher_loss": 0.2963097095489502 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.32695263624191284, + "learning_rate": 2.1594069563689316e-05, + "loss": 0.1851, + "step": 14285, + "teacher_loss": 0.16935238242149353 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.42051804065704346, + "learning_rate": 2.1592029471149607e-05, + "loss": 0.2834, + "step": 14286, + "teacher_loss": 0.2681804895401001 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.3083737790584564, + "learning_rate": 2.1589989227474433e-05, + "loss": 0.3206, + "step": 14287, + "teacher_loss": 0.32191166281700134 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.4337674379348755, + "learning_rate": 2.1587948832710557e-05, + "loss": 0.2561, + "step": 14288, + "teacher_loss": 0.23639589548110962 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.24450816214084625, + "learning_rate": 2.158590828690477e-05, + "loss": 0.2515, + "step": 14289, + "teacher_loss": 0.25224125385284424 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 1.0289298295974731, + "learning_rate": 2.1583867590103848e-05, + "loss": 0.4848, + "step": 14290, + "teacher_loss": 0.42434966564178467 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.3674364686012268, + "learning_rate": 2.1581826742354585e-05, + "loss": 0.4311, + "step": 14291, + "teacher_loss": 0.4382132589817047 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.2838875353336334, + "learning_rate": 2.1579785743703763e-05, + "loss": 0.2158, + "step": 14292, + "teacher_loss": 0.2082471251487732 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 1.239539623260498, + "learning_rate": 2.157774459419819e-05, + "loss": 0.3537, + "step": 14293, + "teacher_loss": 0.25524815917015076 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5885140299797058, + "learning_rate": 2.1575703293884646e-05, + "loss": 0.2394, + "step": 14294, + "teacher_loss": 0.20059171319007874 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.38943448662757874, + "learning_rate": 2.1573661842809942e-05, + "loss": 0.2814, + "step": 14295, + "teacher_loss": 0.2694370746612549 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.7366315126419067, + "learning_rate": 2.1571620241020884e-05, + "loss": 0.4661, + "step": 14296, + "teacher_loss": 0.4360237121582031 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.678027868270874, + "learning_rate": 2.1569578488564275e-05, + "loss": 0.3693, + "step": 14297, + "teacher_loss": 0.3349994719028473 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.293626606464386, + "learning_rate": 2.156753658548693e-05, + "loss": 0.231, + "step": 14298, + "teacher_loss": 0.22402653098106384 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.5079373717308044, + "learning_rate": 2.156549453183566e-05, + "loss": 0.3017, + "step": 14299, + "teacher_loss": 0.27876919507980347 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.3499205410480499, + "learning_rate": 2.156345232765728e-05, + "loss": 0.2254, + "step": 14300, + "teacher_loss": 0.21151064336299896 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.6451599597930908, + "learning_rate": 2.1561409972998623e-05, + "loss": 0.2648, + "step": 14301, + "teacher_loss": 0.2225797474384308 + }, + { + "compression_loss": 0.0, + "epoch": 2.58, + "label_loss": 0.6538830995559692, + "learning_rate": 2.1559367467906508e-05, + "loss": 0.3495, + "step": 14302, + "teacher_loss": 0.315729558467865 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.11456164717674255, + "learning_rate": 2.1557324812427755e-05, + "loss": 0.1702, + "step": 14303, + "teacher_loss": 0.17638170719146729 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.4473934769630432, + "learning_rate": 2.1555282006609216e-05, + "loss": 0.2269, + "step": 14304, + "teacher_loss": 0.20235010981559753 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.5119442939758301, + "learning_rate": 2.15532390504977e-05, + "loss": 0.2146, + "step": 14305, + "teacher_loss": 0.18153566122055054 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.6872402429580688, + "learning_rate": 2.155119594414007e-05, + "loss": 0.2661, + "step": 14306, + "teacher_loss": 0.2193007469177246 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.39235514402389526, + "learning_rate": 2.1549152687583158e-05, + "loss": 0.2693, + "step": 14307, + "teacher_loss": 0.2556533217430115 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.26030904054641724, + "learning_rate": 2.1547109280873808e-05, + "loss": 0.2881, + "step": 14308, + "teacher_loss": 0.2912067174911499 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 1.2868714332580566, + "learning_rate": 2.1545065724058873e-05, + "loss": 0.4348, + "step": 14309, + "teacher_loss": 0.3400779068470001 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.6231622099876404, + "learning_rate": 2.15430220171852e-05, + "loss": 0.2925, + "step": 14310, + "teacher_loss": 0.2557588815689087 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.3664616346359253, + "learning_rate": 2.1540978160299656e-05, + "loss": 0.1992, + "step": 14311, + "teacher_loss": 0.18063011765480042 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.6802225112915039, + "learning_rate": 2.153893415344909e-05, + "loss": 0.3152, + "step": 14312, + "teacher_loss": 0.27468788623809814 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.7269768714904785, + "learning_rate": 2.1536889996680366e-05, + "loss": 0.2458, + "step": 14313, + "teacher_loss": 0.19236302375793457 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.7794076204299927, + "learning_rate": 2.1534845690040356e-05, + "loss": 0.2817, + "step": 14314, + "teacher_loss": 0.22641941905021667 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.7687351703643799, + "learning_rate": 2.1532801233575926e-05, + "loss": 0.2954, + "step": 14315, + "teacher_loss": 0.24284933507442474 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.8595342636108398, + "learning_rate": 2.1530756627333953e-05, + "loss": 0.3432, + "step": 14316, + "teacher_loss": 0.28578054904937744 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.3514205813407898, + "learning_rate": 2.1528711871361305e-05, + "loss": 0.182, + "step": 14317, + "teacher_loss": 0.16313891112804413 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.15996265411376953, + "learning_rate": 2.1526666965704874e-05, + "loss": 0.1155, + "step": 14318, + "teacher_loss": 0.11055950820446014 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.3024246096611023, + "learning_rate": 2.152462191041153e-05, + "loss": 0.2081, + "step": 14319, + "teacher_loss": 0.19764773547649384 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.32902562618255615, + "learning_rate": 2.1522576705528173e-05, + "loss": 0.2606, + "step": 14320, + "teacher_loss": 0.25301098823547363 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.49471426010131836, + "learning_rate": 2.1520531351101685e-05, + "loss": 0.3111, + "step": 14321, + "teacher_loss": 0.29075026512145996 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 1.3565127849578857, + "learning_rate": 2.1518485847178965e-05, + "loss": 0.5074, + "step": 14322, + "teacher_loss": 0.413002073764801 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.3960000276565552, + "learning_rate": 2.1516440193806907e-05, + "loss": 0.3064, + "step": 14323, + "teacher_loss": 0.2964242398738861 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.4063524603843689, + "learning_rate": 2.1514394391032413e-05, + "loss": 0.2245, + "step": 14324, + "teacher_loss": 0.20426729321479797 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.32127660512924194, + "learning_rate": 2.151234843890239e-05, + "loss": 0.227, + "step": 14325, + "teacher_loss": 0.21650615334510803 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.7958909273147583, + "learning_rate": 2.151030233746374e-05, + "loss": 0.6925, + "step": 14326, + "teacher_loss": 0.6810251474380493 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.6001822352409363, + "learning_rate": 2.1508256086763372e-05, + "loss": 0.234, + "step": 14327, + "teacher_loss": 0.19330760836601257 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.4064871668815613, + "learning_rate": 2.1506209686848208e-05, + "loss": 0.3108, + "step": 14328, + "teacher_loss": 0.30015844106674194 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.1884167492389679, + "learning_rate": 2.1504163137765164e-05, + "loss": 0.2321, + "step": 14329, + "teacher_loss": 0.23699051141738892 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.29769617319107056, + "learning_rate": 2.1502116439561157e-05, + "loss": 0.2495, + "step": 14330, + "teacher_loss": 0.24416258931159973 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.409062922000885, + "learning_rate": 2.1500069592283116e-05, + "loss": 0.3013, + "step": 14331, + "teacher_loss": 0.28932878375053406 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.7875873446464539, + "learning_rate": 2.1498022595977965e-05, + "loss": 0.2724, + "step": 14332, + "teacher_loss": 0.2152034044265747 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.5639349222183228, + "learning_rate": 2.1495975450692642e-05, + "loss": 0.3083, + "step": 14333, + "teacher_loss": 0.2799008786678314 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.6440930366516113, + "learning_rate": 2.149392815647408e-05, + "loss": 0.2283, + "step": 14334, + "teacher_loss": 0.18214114010334015 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.5947770476341248, + "learning_rate": 2.149188071336921e-05, + "loss": 0.2426, + "step": 14335, + "teacher_loss": 0.20344088971614838 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.5486993193626404, + "learning_rate": 2.148983312142498e-05, + "loss": 0.3035, + "step": 14336, + "teacher_loss": 0.27622726559638977 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.3154642879962921, + "learning_rate": 2.1487785380688333e-05, + "loss": 0.2293, + "step": 14337, + "teacher_loss": 0.21974530816078186 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.34401002526283264, + "learning_rate": 2.1485737491206215e-05, + "loss": 0.2079, + "step": 14338, + "teacher_loss": 0.19275638461112976 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.7187472581863403, + "learning_rate": 2.1483689453025593e-05, + "loss": 0.4394, + "step": 14339, + "teacher_loss": 0.40831443667411804 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.473328560590744, + "learning_rate": 2.14816412661934e-05, + "loss": 0.3027, + "step": 14340, + "teacher_loss": 0.2837018370628357 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.07045336812734604, + "learning_rate": 2.147959293075661e-05, + "loss": 0.1856, + "step": 14341, + "teacher_loss": 0.1983906328678131 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.27863818407058716, + "learning_rate": 2.1477544446762184e-05, + "loss": 0.3661, + "step": 14342, + "teacher_loss": 0.375781774520874 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.48159411549568176, + "learning_rate": 2.147549581425708e-05, + "loss": 0.3009, + "step": 14343, + "teacher_loss": 0.28086531162261963 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.41333091259002686, + "learning_rate": 2.1473447033288275e-05, + "loss": 0.2607, + "step": 14344, + "teacher_loss": 0.24379006028175354 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.26726314425468445, + "learning_rate": 2.1471398103902733e-05, + "loss": 0.1857, + "step": 14345, + "teacher_loss": 0.17664334177970886 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.6123522520065308, + "learning_rate": 2.1469349026147438e-05, + "loss": 0.8066, + "step": 14346, + "teacher_loss": 0.828173041343689 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.4149326980113983, + "learning_rate": 2.1467299800069366e-05, + "loss": 0.226, + "step": 14347, + "teacher_loss": 0.20495428144931793 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.23196470737457275, + "learning_rate": 2.14652504257155e-05, + "loss": 0.1759, + "step": 14348, + "teacher_loss": 0.1696886122226715 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.192484050989151, + "learning_rate": 2.1463200903132825e-05, + "loss": 0.2272, + "step": 14349, + "teacher_loss": 0.231021910905838 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.699876070022583, + "learning_rate": 2.146115123236833e-05, + "loss": 0.2922, + "step": 14350, + "teacher_loss": 0.24689070880413055 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.4879429340362549, + "learning_rate": 2.1459101413469008e-05, + "loss": 0.3465, + "step": 14351, + "teacher_loss": 0.33080393075942993 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.32041695713996887, + "learning_rate": 2.1457051446481855e-05, + "loss": 0.2144, + "step": 14352, + "teacher_loss": 0.20266872644424438 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.5522844791412354, + "learning_rate": 2.1455001331453875e-05, + "loss": 0.295, + "step": 14353, + "teacher_loss": 0.2664240300655365 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.19967886805534363, + "learning_rate": 2.145295106843207e-05, + "loss": 0.2905, + "step": 14354, + "teacher_loss": 0.30054694414138794 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.4694608747959137, + "learning_rate": 2.1450900657463438e-05, + "loss": 0.2432, + "step": 14355, + "teacher_loss": 0.21810433268547058 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.3208380937576294, + "learning_rate": 2.1448850098594998e-05, + "loss": 0.3406, + "step": 14356, + "teacher_loss": 0.3427514433860779 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.5406547784805298, + "learning_rate": 2.144679939187376e-05, + "loss": 0.239, + "step": 14357, + "teacher_loss": 0.20547229051589966 + }, + { + "compression_loss": 0.0, + "epoch": 2.59, + "label_loss": 0.2883642315864563, + "learning_rate": 2.1444748537346737e-05, + "loss": 0.1977, + "step": 14358, + "teacher_loss": 0.1876707375049591 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.41519108414649963, + "learning_rate": 2.1442697535060957e-05, + "loss": 0.2762, + "step": 14359, + "teacher_loss": 0.26078787446022034 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.3476175367832184, + "learning_rate": 2.1440646385063436e-05, + "loss": 0.251, + "step": 14360, + "teacher_loss": 0.2402871698141098 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.2072194516658783, + "learning_rate": 2.1438595087401204e-05, + "loss": 0.1809, + "step": 14361, + "teacher_loss": 0.17793533205986023 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.30776265263557434, + "learning_rate": 2.1436543642121293e-05, + "loss": 0.2234, + "step": 14362, + "teacher_loss": 0.21404105424880981 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.6086610555648804, + "learning_rate": 2.1434492049270725e-05, + "loss": 0.6335, + "step": 14363, + "teacher_loss": 0.6362476944923401 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.4263724088668823, + "learning_rate": 2.143244030889656e-05, + "loss": 0.2586, + "step": 14364, + "teacher_loss": 0.24000096321105957 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5887324213981628, + "learning_rate": 2.1430388421045812e-05, + "loss": 0.3727, + "step": 14365, + "teacher_loss": 0.3486897051334381 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.47745761275291443, + "learning_rate": 2.142833638576554e-05, + "loss": 0.2639, + "step": 14366, + "teacher_loss": 0.24022571742534637 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.8716928958892822, + "learning_rate": 2.1426284203102795e-05, + "loss": 0.3581, + "step": 14367, + "teacher_loss": 0.30102288722991943 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.7652832269668579, + "learning_rate": 2.1424231873104613e-05, + "loss": 0.6356, + "step": 14368, + "teacher_loss": 0.6212201714515686 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.595547080039978, + "learning_rate": 2.1422179395818058e-05, + "loss": 0.3469, + "step": 14369, + "teacher_loss": 0.3193088173866272 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.28941231966018677, + "learning_rate": 2.1420126771290183e-05, + "loss": 0.1996, + "step": 14370, + "teacher_loss": 0.18966497480869293 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.13828805088996887, + "learning_rate": 2.1418073999568047e-05, + "loss": 0.2127, + "step": 14371, + "teacher_loss": 0.22102117538452148 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5491148829460144, + "learning_rate": 2.141602108069872e-05, + "loss": 0.2621, + "step": 14372, + "teacher_loss": 0.23024769127368927 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.4534647762775421, + "learning_rate": 2.1413968014729264e-05, + "loss": 0.3167, + "step": 14373, + "teacher_loss": 0.30152788758277893 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.30152928829193115, + "learning_rate": 2.1411914801706753e-05, + "loss": 0.3113, + "step": 14374, + "teacher_loss": 0.31239378452301025 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.33814698457717896, + "learning_rate": 2.1409861441678262e-05, + "loss": 0.2688, + "step": 14375, + "teacher_loss": 0.2611209452152252 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.9511691927909851, + "learning_rate": 2.1407807934690857e-05, + "loss": 0.3183, + "step": 14376, + "teacher_loss": 0.248009592294693 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.489962637424469, + "learning_rate": 2.1405754280791634e-05, + "loss": 0.2828, + "step": 14377, + "teacher_loss": 0.2598152458667755 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.9335925579071045, + "learning_rate": 2.1403700480027672e-05, + "loss": 0.4965, + "step": 14378, + "teacher_loss": 0.44798070192337036 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5011628866195679, + "learning_rate": 2.1401646532446057e-05, + "loss": 0.2108, + "step": 14379, + "teacher_loss": 0.17855922877788544 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.22789734601974487, + "learning_rate": 2.139959243809388e-05, + "loss": 0.3926, + "step": 14380, + "teacher_loss": 0.4109404683113098 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.29149484634399414, + "learning_rate": 2.139753819701823e-05, + "loss": 0.1911, + "step": 14381, + "teacher_loss": 0.17990244925022125 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.36217203736305237, + "learning_rate": 2.1395483809266217e-05, + "loss": 0.2235, + "step": 14382, + "teacher_loss": 0.20811673998832703 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.3703482449054718, + "learning_rate": 2.1393429274884933e-05, + "loss": 0.1946, + "step": 14383, + "teacher_loss": 0.17502844333648682 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5684225559234619, + "learning_rate": 2.1391374593921483e-05, + "loss": 0.3367, + "step": 14384, + "teacher_loss": 0.31091558933258057 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.44113093614578247, + "learning_rate": 2.1389319766422974e-05, + "loss": 0.2379, + "step": 14385, + "teacher_loss": 0.21526330709457397 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.39999252557754517, + "learning_rate": 2.138726479243652e-05, + "loss": 0.2324, + "step": 14386, + "teacher_loss": 0.2137613445520401 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.23954442143440247, + "learning_rate": 2.138520967200924e-05, + "loss": 0.2249, + "step": 14387, + "teacher_loss": 0.22324882447719574 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5895223617553711, + "learning_rate": 2.1383154405188235e-05, + "loss": 0.2798, + "step": 14388, + "teacher_loss": 0.24540933966636658 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.6869639158248901, + "learning_rate": 2.1381098992020646e-05, + "loss": 0.5363, + "step": 14389, + "teacher_loss": 0.5195050835609436 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.4208378195762634, + "learning_rate": 2.1379043432553584e-05, + "loss": 0.2933, + "step": 14390, + "teacher_loss": 0.2790742516517639 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.7477906942367554, + "learning_rate": 2.1376987726834188e-05, + "loss": 0.3013, + "step": 14391, + "teacher_loss": 0.2517155408859253 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.6484609842300415, + "learning_rate": 2.1374931874909576e-05, + "loss": 0.3079, + "step": 14392, + "teacher_loss": 0.270017147064209 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.7592825889587402, + "learning_rate": 2.1372875876826892e-05, + "loss": 0.3786, + "step": 14393, + "teacher_loss": 0.33627286553382874 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.6592007875442505, + "learning_rate": 2.137081973263327e-05, + "loss": 0.384, + "step": 14394, + "teacher_loss": 0.3534213900566101 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5886995792388916, + "learning_rate": 2.1368763442375852e-05, + "loss": 0.2917, + "step": 14395, + "teacher_loss": 0.2587509751319885 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.26648765802383423, + "learning_rate": 2.1366707006101784e-05, + "loss": 0.2018, + "step": 14396, + "teacher_loss": 0.19461414217948914 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.43760621547698975, + "learning_rate": 2.1364650423858216e-05, + "loss": 0.3037, + "step": 14397, + "teacher_loss": 0.28883352875709534 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.33827027678489685, + "learning_rate": 2.136259369569229e-05, + "loss": 0.2233, + "step": 14398, + "teacher_loss": 0.21052607893943787 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.4529958963394165, + "learning_rate": 2.1360536821651166e-05, + "loss": 0.1899, + "step": 14399, + "teacher_loss": 0.16067785024642944 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.4759179949760437, + "learning_rate": 2.135847980178201e-05, + "loss": 0.3405, + "step": 14400, + "teacher_loss": 0.3254011273384094 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.9083163738250732, + "learning_rate": 2.1356422636131963e-05, + "loss": 0.2323, + "step": 14401, + "teacher_loss": 0.15721768140792847 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5911104083061218, + "learning_rate": 2.1354365324748218e-05, + "loss": 0.3112, + "step": 14402, + "teacher_loss": 0.2800961434841156 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.624241054058075, + "learning_rate": 2.135230786767792e-05, + "loss": 0.2939, + "step": 14403, + "teacher_loss": 0.2571490406990051 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.3971065282821655, + "learning_rate": 2.1350250264968245e-05, + "loss": 0.293, + "step": 14404, + "teacher_loss": 0.28142908215522766 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.7087382078170776, + "learning_rate": 2.1348192516666376e-05, + "loss": 0.4645, + "step": 14405, + "teacher_loss": 0.4373510479927063 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.37750405073165894, + "learning_rate": 2.134613462281948e-05, + "loss": 0.5131, + "step": 14406, + "teacher_loss": 0.5281796455383301 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.21675628423690796, + "learning_rate": 2.1344076583474748e-05, + "loss": 0.1771, + "step": 14407, + "teacher_loss": 0.17263884842395782 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.3920409083366394, + "learning_rate": 2.1342018398679363e-05, + "loss": 0.5068, + "step": 14408, + "teacher_loss": 0.5195322632789612 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.5038798451423645, + "learning_rate": 2.1339960068480503e-05, + "loss": 0.3722, + "step": 14409, + "teacher_loss": 0.3575171232223511 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.22464367747306824, + "learning_rate": 2.133790159292537e-05, + "loss": 0.2733, + "step": 14410, + "teacher_loss": 0.27870655059814453 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.600874125957489, + "learning_rate": 2.1335842972061158e-05, + "loss": 0.4638, + "step": 14411, + "teacher_loss": 0.4485885500907898 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.2844209671020508, + "learning_rate": 2.133378420593506e-05, + "loss": 0.1823, + "step": 14412, + "teacher_loss": 0.17092260718345642 + }, + { + "compression_loss": 0.0, + "epoch": 2.6, + "label_loss": 0.1765948385000229, + "learning_rate": 2.133172529459428e-05, + "loss": 0.2582, + "step": 14413, + "teacher_loss": 0.26729512214660645 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.26239120960235596, + "learning_rate": 2.1329666238086024e-05, + "loss": 0.2546, + "step": 14414, + "teacher_loss": 0.25374260544776917 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.19193045794963837, + "learning_rate": 2.1327607036457492e-05, + "loss": 0.2084, + "step": 14415, + "teacher_loss": 0.2101939618587494 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.6342462301254272, + "learning_rate": 2.132554768975591e-05, + "loss": 0.4105, + "step": 14416, + "teacher_loss": 0.3855966627597809 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.7185182571411133, + "learning_rate": 2.1323488198028484e-05, + "loss": 0.3025, + "step": 14417, + "teacher_loss": 0.2562815248966217 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5414015054702759, + "learning_rate": 2.1321428561322428e-05, + "loss": 0.3128, + "step": 14418, + "teacher_loss": 0.2874165177345276 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.4086994230747223, + "learning_rate": 2.1319368779684972e-05, + "loss": 0.3196, + "step": 14419, + "teacher_loss": 0.30965232849121094 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.7419878840446472, + "learning_rate": 2.1317308853163333e-05, + "loss": 0.5899, + "step": 14420, + "teacher_loss": 0.5729924440383911 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.2743135690689087, + "learning_rate": 2.1315248781804743e-05, + "loss": 0.2124, + "step": 14421, + "teacher_loss": 0.20551703870296478 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.26606953144073486, + "learning_rate": 2.1313188565656434e-05, + "loss": 0.2481, + "step": 14422, + "teacher_loss": 0.24607792496681213 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.2285497933626175, + "learning_rate": 2.1311128204765634e-05, + "loss": 0.1854, + "step": 14423, + "teacher_loss": 0.18065476417541504 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.38702714443206787, + "learning_rate": 2.130906769917959e-05, + "loss": 0.2163, + "step": 14424, + "teacher_loss": 0.19730296730995178 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5607089996337891, + "learning_rate": 2.1307007048945538e-05, + "loss": 0.3629, + "step": 14425, + "teacher_loss": 0.34096360206604004 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.39820170402526855, + "learning_rate": 2.1304946254110727e-05, + "loss": 0.2256, + "step": 14426, + "teacher_loss": 0.206430584192276 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.30117055773735046, + "learning_rate": 2.13028853147224e-05, + "loss": 0.2338, + "step": 14427, + "teacher_loss": 0.22631970047950745 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.4237770438194275, + "learning_rate": 2.1300824230827808e-05, + "loss": 0.262, + "step": 14428, + "teacher_loss": 0.24397261440753937 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.6387193202972412, + "learning_rate": 2.129876300247421e-05, + "loss": 0.4991, + "step": 14429, + "teacher_loss": 0.48361361026763916 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.3046054244041443, + "learning_rate": 2.129670162970886e-05, + "loss": 0.2585, + "step": 14430, + "teacher_loss": 0.2533457279205322 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.48005443811416626, + "learning_rate": 2.129464011257902e-05, + "loss": 0.3538, + "step": 14431, + "teacher_loss": 0.3398140072822571 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.8018385171890259, + "learning_rate": 2.1292578451131953e-05, + "loss": 0.344, + "step": 14432, + "teacher_loss": 0.29312971234321594 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.3150048553943634, + "learning_rate": 2.129051664541493e-05, + "loss": 0.2657, + "step": 14433, + "teacher_loss": 0.26026415824890137 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.6677534580230713, + "learning_rate": 2.1288454695475218e-05, + "loss": 0.5446, + "step": 14434, + "teacher_loss": 0.5309034585952759 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.37522852420806885, + "learning_rate": 2.1286392601360095e-05, + "loss": 0.2901, + "step": 14435, + "teacher_loss": 0.28069210052490234 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.527740478515625, + "learning_rate": 2.128433036311684e-05, + "loss": 0.2821, + "step": 14436, + "teacher_loss": 0.2548117935657501 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5387645363807678, + "learning_rate": 2.128226798079273e-05, + "loss": 0.2881, + "step": 14437, + "teacher_loss": 0.2602214217185974 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.426969051361084, + "learning_rate": 2.1280205454435047e-05, + "loss": 0.2114, + "step": 14438, + "teacher_loss": 0.1874251365661621 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.2988869547843933, + "learning_rate": 2.1278142784091085e-05, + "loss": 0.271, + "step": 14439, + "teacher_loss": 0.2679510712623596 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.46239596605300903, + "learning_rate": 2.127607996980813e-05, + "loss": 0.3226, + "step": 14440, + "teacher_loss": 0.30709004402160645 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.19903071224689484, + "learning_rate": 2.127401701163348e-05, + "loss": 0.1719, + "step": 14441, + "teacher_loss": 0.16883979737758636 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.4708396792411804, + "learning_rate": 2.127195390961443e-05, + "loss": 0.3982, + "step": 14442, + "teacher_loss": 0.39010220766067505 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5290124416351318, + "learning_rate": 2.126989066379828e-05, + "loss": 0.2699, + "step": 14443, + "teacher_loss": 0.24112248420715332 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.320599764585495, + "learning_rate": 2.1267827274232335e-05, + "loss": 0.2191, + "step": 14444, + "teacher_loss": 0.20787745714187622 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.4347943961620331, + "learning_rate": 2.12657637409639e-05, + "loss": 0.2518, + "step": 14445, + "teacher_loss": 0.23144616186618805 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.15413731336593628, + "learning_rate": 2.1263700064040293e-05, + "loss": 0.1727, + "step": 14446, + "teacher_loss": 0.17479299008846283 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 1.0684468746185303, + "learning_rate": 2.126163624350882e-05, + "loss": 0.3132, + "step": 14447, + "teacher_loss": 0.22930869460105896 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.7531291246414185, + "learning_rate": 2.12595722794168e-05, + "loss": 0.3471, + "step": 14448, + "teacher_loss": 0.3019852638244629 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.3335733413696289, + "learning_rate": 2.1257508171811563e-05, + "loss": 0.2978, + "step": 14449, + "teacher_loss": 0.29385241866111755 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.32621896266937256, + "learning_rate": 2.1255443920740417e-05, + "loss": 0.3307, + "step": 14450, + "teacher_loss": 0.3312102258205414 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.2610246241092682, + "learning_rate": 2.12533795262507e-05, + "loss": 0.3511, + "step": 14451, + "teacher_loss": 0.3610619902610779 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.3389078974723816, + "learning_rate": 2.125131498838974e-05, + "loss": 0.2537, + "step": 14452, + "teacher_loss": 0.24422556161880493 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5795071125030518, + "learning_rate": 2.1249250307204864e-05, + "loss": 0.2266, + "step": 14453, + "teacher_loss": 0.1873396635055542 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.7731492519378662, + "learning_rate": 2.124718548274342e-05, + "loss": 0.3108, + "step": 14454, + "teacher_loss": 0.25940829515457153 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.34906116127967834, + "learning_rate": 2.1245120515052738e-05, + "loss": 0.29, + "step": 14455, + "teacher_loss": 0.2834164798259735 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.9481781721115112, + "learning_rate": 2.124305540418017e-05, + "loss": 0.2777, + "step": 14456, + "teacher_loss": 0.2032223343849182 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.43922561407089233, + "learning_rate": 2.124099015017306e-05, + "loss": 0.2405, + "step": 14457, + "teacher_loss": 0.2183646708726883 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.44619810581207275, + "learning_rate": 2.1238924753078755e-05, + "loss": 0.2945, + "step": 14458, + "teacher_loss": 0.2776389718055725 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5715669393539429, + "learning_rate": 2.1236859212944613e-05, + "loss": 0.3972, + "step": 14459, + "teacher_loss": 0.377782940864563 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.6885632276535034, + "learning_rate": 2.123479352981799e-05, + "loss": 0.313, + "step": 14460, + "teacher_loss": 0.27130627632141113 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.4756855070590973, + "learning_rate": 2.123272770374624e-05, + "loss": 0.2834, + "step": 14461, + "teacher_loss": 0.2620348036289215 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.4813898205757141, + "learning_rate": 2.1230661734776728e-05, + "loss": 0.262, + "step": 14462, + "teacher_loss": 0.23762303590774536 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.8735138177871704, + "learning_rate": 2.122859562295683e-05, + "loss": 0.2217, + "step": 14463, + "teacher_loss": 0.14925727248191833 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.5166009664535522, + "learning_rate": 2.1226529368333904e-05, + "loss": 0.2449, + "step": 14464, + "teacher_loss": 0.21474343538284302 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.9103567600250244, + "learning_rate": 2.122446297095533e-05, + "loss": 0.3825, + "step": 14465, + "teacher_loss": 0.3238013982772827 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.23310214281082153, + "learning_rate": 2.122239643086848e-05, + "loss": 0.2248, + "step": 14466, + "teacher_loss": 0.22383025288581848 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.18237142264842987, + "learning_rate": 2.1220329748120736e-05, + "loss": 0.1698, + "step": 14467, + "teacher_loss": 0.16834893822669983 + }, + { + "compression_loss": 0.0, + "epoch": 2.61, + "label_loss": 0.2115219235420227, + "learning_rate": 2.1218262922759484e-05, + "loss": 0.172, + "step": 14468, + "teacher_loss": 0.16755372285842896 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5679544806480408, + "learning_rate": 2.1216195954832098e-05, + "loss": 0.247, + "step": 14469, + "teacher_loss": 0.2113179713487625 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.6342524290084839, + "learning_rate": 2.1214128844385984e-05, + "loss": 0.3366, + "step": 14470, + "teacher_loss": 0.3035721778869629 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.42312175035476685, + "learning_rate": 2.1212061591468524e-05, + "loss": 0.2136, + "step": 14471, + "teacher_loss": 0.19028016924858093 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5176512598991394, + "learning_rate": 2.1209994196127117e-05, + "loss": 0.5481, + "step": 14472, + "teacher_loss": 0.5514723062515259 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.39181992411613464, + "learning_rate": 2.120792665840916e-05, + "loss": 0.2419, + "step": 14473, + "teacher_loss": 0.2252657413482666 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 1.0661423206329346, + "learning_rate": 2.1205858978362056e-05, + "loss": 0.3835, + "step": 14474, + "teacher_loss": 0.3076481819152832 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.36675921082496643, + "learning_rate": 2.1203791156033217e-05, + "loss": 0.205, + "step": 14475, + "teacher_loss": 0.18701758980751038 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.3144836723804474, + "learning_rate": 2.1201723191470043e-05, + "loss": 0.2553, + "step": 14476, + "teacher_loss": 0.2487637847661972 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.41050562262535095, + "learning_rate": 2.1199655084719954e-05, + "loss": 0.2477, + "step": 14477, + "teacher_loss": 0.22958195209503174 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.34472328424453735, + "learning_rate": 2.1197586835830354e-05, + "loss": 0.3646, + "step": 14478, + "teacher_loss": 0.36683762073516846 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.3256007432937622, + "learning_rate": 2.1195518444848673e-05, + "loss": 0.1769, + "step": 14479, + "teacher_loss": 0.16036507487297058 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4364761710166931, + "learning_rate": 2.1193449911822328e-05, + "loss": 0.2845, + "step": 14480, + "teacher_loss": 0.2676158547401428 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4624486565589905, + "learning_rate": 2.1191381236798746e-05, + "loss": 0.2137, + "step": 14481, + "teacher_loss": 0.1860925257205963 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4825872778892517, + "learning_rate": 2.1189312419825358e-05, + "loss": 0.2201, + "step": 14482, + "teacher_loss": 0.1909511685371399 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.35513556003570557, + "learning_rate": 2.118724346094959e-05, + "loss": 0.2044, + "step": 14483, + "teacher_loss": 0.1876668483018875 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.2631889879703522, + "learning_rate": 2.118517436021888e-05, + "loss": 0.1785, + "step": 14484, + "teacher_loss": 0.16910137236118317 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.7301787734031677, + "learning_rate": 2.1183105117680667e-05, + "loss": 0.3538, + "step": 14485, + "teacher_loss": 0.31199949979782104 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.31994932889938354, + "learning_rate": 2.1181035733382388e-05, + "loss": 0.2923, + "step": 14486, + "teacher_loss": 0.2892826795578003 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.22506609559059143, + "learning_rate": 2.1178966207371496e-05, + "loss": 0.1481, + "step": 14487, + "teacher_loss": 0.13950274884700775 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.3799022436141968, + "learning_rate": 2.1176896539695434e-05, + "loss": 0.4455, + "step": 14488, + "teacher_loss": 0.45276570320129395 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5600171685218811, + "learning_rate": 2.1174826730401647e-05, + "loss": 0.3911, + "step": 14489, + "teacher_loss": 0.3723044991493225 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.19089347124099731, + "learning_rate": 2.1172756779537606e-05, + "loss": 0.2136, + "step": 14490, + "teacher_loss": 0.21609412133693695 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.30896735191345215, + "learning_rate": 2.117068668715075e-05, + "loss": 0.2817, + "step": 14491, + "teacher_loss": 0.27861863374710083 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5656956434249878, + "learning_rate": 2.1168616453288557e-05, + "loss": 0.3747, + "step": 14492, + "teacher_loss": 0.353436142206192 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5164735317230225, + "learning_rate": 2.1166546077998477e-05, + "loss": 0.317, + "step": 14493, + "teacher_loss": 0.29486221075057983 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.32413625717163086, + "learning_rate": 2.1164475561327984e-05, + "loss": 0.2263, + "step": 14494, + "teacher_loss": 0.21547305583953857 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.28341031074523926, + "learning_rate": 2.1162404903324558e-05, + "loss": 0.2358, + "step": 14495, + "teacher_loss": 0.23055371642112732 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.2807903289794922, + "learning_rate": 2.1160334104035654e-05, + "loss": 0.211, + "step": 14496, + "teacher_loss": 0.2032170295715332 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4560181498527527, + "learning_rate": 2.115826316350876e-05, + "loss": 0.2071, + "step": 14497, + "teacher_loss": 0.17938853800296783 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.44641512632369995, + "learning_rate": 2.1156192081791355e-05, + "loss": 0.4098, + "step": 14498, + "teacher_loss": 0.4057130217552185 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4787118434906006, + "learning_rate": 2.115412085893092e-05, + "loss": 0.2293, + "step": 14499, + "teacher_loss": 0.2015482783317566 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5047792792320251, + "learning_rate": 2.115204949497495e-05, + "loss": 0.2904, + "step": 14500, + "teacher_loss": 0.2665749192237854 + }, + { + "epoch": 2.62, + "eval_exact_match": 79.51750236518448, + "eval_f1": 86.93928215304093, + "step": 14500 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5995454788208008, + "learning_rate": 2.1149977989970926e-05, + "loss": 0.2962, + "step": 14501, + "teacher_loss": 0.26248687505722046 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.6062762141227722, + "learning_rate": 2.114790634396635e-05, + "loss": 0.2912, + "step": 14502, + "teacher_loss": 0.2562112808227539 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.6573885679244995, + "learning_rate": 2.114583455700871e-05, + "loss": 0.3305, + "step": 14503, + "teacher_loss": 0.2941551208496094 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.37098604440689087, + "learning_rate": 2.114376262914551e-05, + "loss": 0.1917, + "step": 14504, + "teacher_loss": 0.1717677116394043 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5347769856452942, + "learning_rate": 2.1141690560424253e-05, + "loss": 0.2743, + "step": 14505, + "teacher_loss": 0.2453932762145996 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.45043063163757324, + "learning_rate": 2.1139618350892447e-05, + "loss": 0.3166, + "step": 14506, + "teacher_loss": 0.3017372488975525 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.3075307011604309, + "learning_rate": 2.1137546000597603e-05, + "loss": 0.2254, + "step": 14507, + "teacher_loss": 0.21629445254802704 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.3495030403137207, + "learning_rate": 2.1135473509587222e-05, + "loss": 0.2623, + "step": 14508, + "teacher_loss": 0.2525894045829773 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.8060945272445679, + "learning_rate": 2.1133400877908833e-05, + "loss": 0.3409, + "step": 14509, + "teacher_loss": 0.28916603326797485 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.6402349472045898, + "learning_rate": 2.1131328105609954e-05, + "loss": 0.2651, + "step": 14510, + "teacher_loss": 0.2234615683555603 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.6258589029312134, + "learning_rate": 2.1129255192738096e-05, + "loss": 0.3852, + "step": 14511, + "teacher_loss": 0.3584093451499939 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.10898482799530029, + "learning_rate": 2.11271821393408e-05, + "loss": 0.2298, + "step": 14512, + "teacher_loss": 0.24322909116744995 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.3679760992527008, + "learning_rate": 2.1125108945465583e-05, + "loss": 0.2313, + "step": 14513, + "teacher_loss": 0.21611681580543518 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5516211986541748, + "learning_rate": 2.1123035611159984e-05, + "loss": 0.2161, + "step": 14514, + "teacher_loss": 0.17886114120483398 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.1856776773929596, + "learning_rate": 2.112096213647154e-05, + "loss": 0.1809, + "step": 14515, + "teacher_loss": 0.1804029643535614 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.6840387582778931, + "learning_rate": 2.111888852144778e-05, + "loss": 0.2634, + "step": 14516, + "teacher_loss": 0.21670308709144592 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.7231588959693909, + "learning_rate": 2.111681476613625e-05, + "loss": 0.3998, + "step": 14517, + "teacher_loss": 0.36390289664268494 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4352991580963135, + "learning_rate": 2.1114740870584504e-05, + "loss": 0.2345, + "step": 14518, + "teacher_loss": 0.21220329403877258 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5919502973556519, + "learning_rate": 2.111266683484008e-05, + "loss": 0.2939, + "step": 14519, + "teacher_loss": 0.26076579093933105 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.5072752833366394, + "learning_rate": 2.1110592658950534e-05, + "loss": 0.2557, + "step": 14520, + "teacher_loss": 0.22772987186908722 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.29583653807640076, + "learning_rate": 2.1108518342963412e-05, + "loss": 0.2504, + "step": 14521, + "teacher_loss": 0.24532687664031982 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.4957273006439209, + "learning_rate": 2.1106443886926288e-05, + "loss": 0.2931, + "step": 14522, + "teacher_loss": 0.2705777585506439 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.34643977880477905, + "learning_rate": 2.110436929088671e-05, + "loss": 0.1677, + "step": 14523, + "teacher_loss": 0.14782464504241943 + }, + { + "compression_loss": 0.0, + "epoch": 2.62, + "label_loss": 0.28452128171920776, + "learning_rate": 2.110229455489224e-05, + "loss": 0.2392, + "step": 14524, + "teacher_loss": 0.23421666026115417 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7480511665344238, + "learning_rate": 2.1100219678990457e-05, + "loss": 0.4178, + "step": 14525, + "teacher_loss": 0.3811163902282715 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.31360793113708496, + "learning_rate": 2.1098144663228923e-05, + "loss": 0.2742, + "step": 14526, + "teacher_loss": 0.26982730627059937 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.8070668578147888, + "learning_rate": 2.109606950765522e-05, + "loss": 0.3356, + "step": 14527, + "teacher_loss": 0.28317731618881226 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3485603630542755, + "learning_rate": 2.1093994212316918e-05, + "loss": 0.2649, + "step": 14528, + "teacher_loss": 0.2555796802043915 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.4407389760017395, + "learning_rate": 2.1091918777261596e-05, + "loss": 0.3781, + "step": 14529, + "teacher_loss": 0.371107816696167 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3816796839237213, + "learning_rate": 2.1089843202536847e-05, + "loss": 0.4507, + "step": 14530, + "teacher_loss": 0.458349347114563 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.4882408678531647, + "learning_rate": 2.108776748819025e-05, + "loss": 0.4083, + "step": 14531, + "teacher_loss": 0.3994430899620056 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3758133053779602, + "learning_rate": 2.1085691634269397e-05, + "loss": 0.2774, + "step": 14532, + "teacher_loss": 0.266434907913208 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7965704202651978, + "learning_rate": 2.1083615640821875e-05, + "loss": 0.348, + "step": 14533, + "teacher_loss": 0.29811179637908936 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3647003769874573, + "learning_rate": 2.108153950789529e-05, + "loss": 0.2781, + "step": 14534, + "teacher_loss": 0.2685237526893616 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.8953534960746765, + "learning_rate": 2.107946323553724e-05, + "loss": 0.3007, + "step": 14535, + "teacher_loss": 0.23466211557388306 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.26077595353126526, + "learning_rate": 2.107738682379532e-05, + "loss": 0.1888, + "step": 14536, + "teacher_loss": 0.18084746599197388 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.9403353333473206, + "learning_rate": 2.1075310272717147e-05, + "loss": 0.7519, + "step": 14537, + "teacher_loss": 0.730973482131958 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.32168591022491455, + "learning_rate": 2.107323358235032e-05, + "loss": 0.2376, + "step": 14538, + "teacher_loss": 0.2282719612121582 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.5695239901542664, + "learning_rate": 2.1071156752742455e-05, + "loss": 0.2445, + "step": 14539, + "teacher_loss": 0.20838084816932678 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.1920047402381897, + "learning_rate": 2.106907978394117e-05, + "loss": 0.1709, + "step": 14540, + "teacher_loss": 0.1685151755809784 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.5061135292053223, + "learning_rate": 2.1067002675994077e-05, + "loss": 0.28, + "step": 14541, + "teacher_loss": 0.25492262840270996 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.653713047504425, + "learning_rate": 2.1064925428948808e-05, + "loss": 0.339, + "step": 14542, + "teacher_loss": 0.30398696660995483 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.44536933302879333, + "learning_rate": 2.106284804285298e-05, + "loss": 0.3035, + "step": 14543, + "teacher_loss": 0.2876836955547333 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.6877913475036621, + "learning_rate": 2.1060770517754223e-05, + "loss": 0.5858, + "step": 14544, + "teacher_loss": 0.5744898319244385 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.40376752614974976, + "learning_rate": 2.1058692853700174e-05, + "loss": 0.2394, + "step": 14545, + "teacher_loss": 0.22113852202892303 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.36561787128448486, + "learning_rate": 2.1056615050738458e-05, + "loss": 0.2141, + "step": 14546, + "teacher_loss": 0.19726331532001495 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.27502134442329407, + "learning_rate": 2.105453710891672e-05, + "loss": 0.2366, + "step": 14547, + "teacher_loss": 0.23234912753105164 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.31792324781417847, + "learning_rate": 2.10524590282826e-05, + "loss": 0.2436, + "step": 14548, + "teacher_loss": 0.23531028628349304 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.35475093126296997, + "learning_rate": 2.1050380808883735e-05, + "loss": 0.2742, + "step": 14549, + "teacher_loss": 0.265228271484375 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.38412511348724365, + "learning_rate": 2.1048302450767785e-05, + "loss": 0.1854, + "step": 14550, + "teacher_loss": 0.16329364478588104 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3182380795478821, + "learning_rate": 2.1046223953982385e-05, + "loss": 0.2889, + "step": 14551, + "teacher_loss": 0.28558987379074097 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.6259177923202515, + "learning_rate": 2.1044145318575204e-05, + "loss": 0.5042, + "step": 14552, + "teacher_loss": 0.490645170211792 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.8444479703903198, + "learning_rate": 2.1042066544593894e-05, + "loss": 0.5037, + "step": 14553, + "teacher_loss": 0.4658720791339874 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.5971534252166748, + "learning_rate": 2.1039987632086105e-05, + "loss": 0.2396, + "step": 14554, + "teacher_loss": 0.19987118244171143 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.33437925577163696, + "learning_rate": 2.1037908581099512e-05, + "loss": 0.2993, + "step": 14555, + "teacher_loss": 0.2954440414905548 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.6378849148750305, + "learning_rate": 2.1035829391681783e-05, + "loss": 0.2156, + "step": 14556, + "teacher_loss": 0.16867297887802124 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.5989898443222046, + "learning_rate": 2.1033750063880577e-05, + "loss": 0.343, + "step": 14557, + "teacher_loss": 0.31458014249801636 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.27151185274124146, + "learning_rate": 2.1031670597743574e-05, + "loss": 0.2725, + "step": 14558, + "teacher_loss": 0.27260616421699524 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.44433945417404175, + "learning_rate": 2.1029590993318446e-05, + "loss": 0.224, + "step": 14559, + "teacher_loss": 0.19949209690093994 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7775967717170715, + "learning_rate": 2.1027511250652877e-05, + "loss": 0.272, + "step": 14560, + "teacher_loss": 0.21585991978645325 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.6937455534934998, + "learning_rate": 2.1025431369794546e-05, + "loss": 0.248, + "step": 14561, + "teacher_loss": 0.1985187530517578 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.9760698080062866, + "learning_rate": 2.1023351350791138e-05, + "loss": 0.3725, + "step": 14562, + "teacher_loss": 0.30542656779289246 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.27515149116516113, + "learning_rate": 2.102127119369034e-05, + "loss": 0.2038, + "step": 14563, + "teacher_loss": 0.19581684470176697 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3918849229812622, + "learning_rate": 2.1019190898539845e-05, + "loss": 0.2113, + "step": 14564, + "teacher_loss": 0.19118145108222961 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.2730717360973358, + "learning_rate": 2.1017110465387355e-05, + "loss": 0.2424, + "step": 14565, + "teacher_loss": 0.23895448446273804 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3024517297744751, + "learning_rate": 2.1015029894280557e-05, + "loss": 0.2226, + "step": 14566, + "teacher_loss": 0.21374335885047913 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.29027271270751953, + "learning_rate": 2.1012949185267164e-05, + "loss": 0.1594, + "step": 14567, + "teacher_loss": 0.14490941166877747 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.3681783080101013, + "learning_rate": 2.1010868338394868e-05, + "loss": 0.2367, + "step": 14568, + "teacher_loss": 0.22213119268417358 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.47246965765953064, + "learning_rate": 2.1008787353711386e-05, + "loss": 0.2155, + "step": 14569, + "teacher_loss": 0.1869097352027893 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7064085006713867, + "learning_rate": 2.1006706231264426e-05, + "loss": 0.4949, + "step": 14570, + "teacher_loss": 0.47139036655426025 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.1997348666191101, + "learning_rate": 2.1004624971101696e-05, + "loss": 0.2698, + "step": 14571, + "teacher_loss": 0.27758127450942993 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.2163429707288742, + "learning_rate": 2.1002543573270925e-05, + "loss": 0.2067, + "step": 14572, + "teacher_loss": 0.20557327568531036 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7282078266143799, + "learning_rate": 2.1000462037819824e-05, + "loss": 0.336, + "step": 14573, + "teacher_loss": 0.29239726066589355 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.5752413272857666, + "learning_rate": 2.0998380364796112e-05, + "loss": 0.277, + "step": 14574, + "teacher_loss": 0.24383561313152313 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7550591230392456, + "learning_rate": 2.0996298554247534e-05, + "loss": 0.4287, + "step": 14575, + "teacher_loss": 0.39241844415664673 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.7636871337890625, + "learning_rate": 2.09942166062218e-05, + "loss": 0.3412, + "step": 14576, + "teacher_loss": 0.2942371666431427 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.20371948182582855, + "learning_rate": 2.0992134520766652e-05, + "loss": 0.1594, + "step": 14577, + "teacher_loss": 0.15448534488677979 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.4729740023612976, + "learning_rate": 2.0990052297929825e-05, + "loss": 0.2799, + "step": 14578, + "teacher_loss": 0.2584296464920044 + }, + { + "compression_loss": 0.0, + "epoch": 2.63, + "label_loss": 0.450888454914093, + "learning_rate": 2.0987969937759058e-05, + "loss": 0.2321, + "step": 14579, + "teacher_loss": 0.20781373977661133 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.3802034854888916, + "learning_rate": 2.0985887440302098e-05, + "loss": 0.2746, + "step": 14580, + "teacher_loss": 0.2629126012325287 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.2989276945590973, + "learning_rate": 2.0983804805606678e-05, + "loss": 0.1827, + "step": 14581, + "teacher_loss": 0.16980625689029694 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5499764084815979, + "learning_rate": 2.098172203372056e-05, + "loss": 0.2976, + "step": 14582, + "teacher_loss": 0.26956692337989807 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5096302032470703, + "learning_rate": 2.0979639124691488e-05, + "loss": 0.2996, + "step": 14583, + "teacher_loss": 0.27626073360443115 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5435378551483154, + "learning_rate": 2.0977556078567215e-05, + "loss": 0.2684, + "step": 14584, + "teacher_loss": 0.23783159255981445 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.14881932735443115, + "learning_rate": 2.097547289539551e-05, + "loss": 0.2124, + "step": 14585, + "teacher_loss": 0.21947932243347168 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.1609226018190384, + "learning_rate": 2.097338957522412e-05, + "loss": 0.2814, + "step": 14586, + "teacher_loss": 0.2948067784309387 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.6556715965270996, + "learning_rate": 2.0971306118100818e-05, + "loss": 0.2885, + "step": 14587, + "teacher_loss": 0.2477402687072754 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.38650113344192505, + "learning_rate": 2.096922252407337e-05, + "loss": 0.169, + "step": 14588, + "teacher_loss": 0.14481377601623535 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.3359062075614929, + "learning_rate": 2.0967138793189548e-05, + "loss": 0.2475, + "step": 14589, + "teacher_loss": 0.23768089711666107 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.19188524782657623, + "learning_rate": 2.0965054925497124e-05, + "loss": 0.1734, + "step": 14590, + "teacher_loss": 0.17137299478054047 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.482607901096344, + "learning_rate": 2.0962970921043874e-05, + "loss": 0.24, + "step": 14591, + "teacher_loss": 0.2130959928035736 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.6315637826919556, + "learning_rate": 2.0960886779877576e-05, + "loss": 0.3931, + "step": 14592, + "teacher_loss": 0.36656075716018677 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.4184904396533966, + "learning_rate": 2.095880250204602e-05, + "loss": 0.265, + "step": 14593, + "teacher_loss": 0.24792422354221344 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.21091127395629883, + "learning_rate": 2.0956718087596984e-05, + "loss": 0.1692, + "step": 14594, + "teacher_loss": 0.16451403498649597 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.3134652376174927, + "learning_rate": 2.0954633536578267e-05, + "loss": 0.2231, + "step": 14595, + "teacher_loss": 0.2130478173494339 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.7868915796279907, + "learning_rate": 2.0952548849037648e-05, + "loss": 0.3979, + "step": 14596, + "teacher_loss": 0.3546677827835083 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.7050416469573975, + "learning_rate": 2.0950464025022936e-05, + "loss": 0.3306, + "step": 14597, + "teacher_loss": 0.28899216651916504 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.3890405297279358, + "learning_rate": 2.0948379064581926e-05, + "loss": 0.2586, + "step": 14598, + "teacher_loss": 0.24412935972213745 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.4515721797943115, + "learning_rate": 2.0946293967762414e-05, + "loss": 0.1825, + "step": 14599, + "teacher_loss": 0.15259262919425964 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.49622777104377747, + "learning_rate": 2.0944208734612214e-05, + "loss": 0.3378, + "step": 14600, + "teacher_loss": 0.32023149728775024 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.27318406105041504, + "learning_rate": 2.0942123365179123e-05, + "loss": 0.2716, + "step": 14601, + "teacher_loss": 0.27146002650260925 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.33356279134750366, + "learning_rate": 2.0940037859510963e-05, + "loss": 0.2879, + "step": 14602, + "teacher_loss": 0.2828671336174011 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 1.2546168565750122, + "learning_rate": 2.093795221765554e-05, + "loss": 0.4395, + "step": 14603, + "teacher_loss": 0.3489788770675659 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.19852034747600555, + "learning_rate": 2.093586643966068e-05, + "loss": 0.2417, + "step": 14604, + "teacher_loss": 0.24644528329372406 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.6874034404754639, + "learning_rate": 2.09337805255742e-05, + "loss": 0.3272, + "step": 14605, + "teacher_loss": 0.28713172674179077 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.3663751482963562, + "learning_rate": 2.093169447544392e-05, + "loss": 0.2996, + "step": 14606, + "teacher_loss": 0.2922108471393585 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.7669240236282349, + "learning_rate": 2.0929608289317668e-05, + "loss": 0.4997, + "step": 14607, + "teacher_loss": 0.4699811339378357 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5025430917739868, + "learning_rate": 2.0927521967243277e-05, + "loss": 0.3428, + "step": 14608, + "teacher_loss": 0.3250804543495178 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.29807913303375244, + "learning_rate": 2.092543550926858e-05, + "loss": 0.2532, + "step": 14609, + "teacher_loss": 0.2481934130191803 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.2336660623550415, + "learning_rate": 2.0923348915441413e-05, + "loss": 0.181, + "step": 14610, + "teacher_loss": 0.1751883625984192 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5441685318946838, + "learning_rate": 2.0921262185809615e-05, + "loss": 0.2548, + "step": 14611, + "teacher_loss": 0.22260916233062744 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.1980355679988861, + "learning_rate": 2.0919175320421023e-05, + "loss": 0.2452, + "step": 14612, + "teacher_loss": 0.2504881024360657 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.15502992272377014, + "learning_rate": 2.0917088319323497e-05, + "loss": 0.1693, + "step": 14613, + "teacher_loss": 0.17090265452861786 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.3092743754386902, + "learning_rate": 2.0915001182564865e-05, + "loss": 0.3307, + "step": 14614, + "teacher_loss": 0.3331238627433777 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.450814425945282, + "learning_rate": 2.0912913910192996e-05, + "loss": 0.209, + "step": 14615, + "teacher_loss": 0.1821650266647339 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.1796136498451233, + "learning_rate": 2.091082650225574e-05, + "loss": 0.1643, + "step": 14616, + "teacher_loss": 0.16258826851844788 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5932972431182861, + "learning_rate": 2.090873895880095e-05, + "loss": 0.317, + "step": 14617, + "teacher_loss": 0.28626346588134766 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.6091727018356323, + "learning_rate": 2.090665127987649e-05, + "loss": 0.3711, + "step": 14618, + "teacher_loss": 0.3446587324142456 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.640316367149353, + "learning_rate": 2.090456346553023e-05, + "loss": 0.478, + "step": 14619, + "teacher_loss": 0.4599878787994385 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.27549922466278076, + "learning_rate": 2.0902475515810034e-05, + "loss": 0.2438, + "step": 14620, + "teacher_loss": 0.24026665091514587 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.18280774354934692, + "learning_rate": 2.0900387430763767e-05, + "loss": 0.2057, + "step": 14621, + "teacher_loss": 0.20828525722026825 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 1.0501079559326172, + "learning_rate": 2.0898299210439305e-05, + "loss": 0.4427, + "step": 14622, + "teacher_loss": 0.37516582012176514 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.46030622720718384, + "learning_rate": 2.0896210854884528e-05, + "loss": 0.2721, + "step": 14623, + "teacher_loss": 0.2511984705924988 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.2684404253959656, + "learning_rate": 2.0894122364147315e-05, + "loss": 0.2386, + "step": 14624, + "teacher_loss": 0.23532560467720032 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.6694341897964478, + "learning_rate": 2.0892033738275553e-05, + "loss": 0.2312, + "step": 14625, + "teacher_loss": 0.18255293369293213 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.4191741347312927, + "learning_rate": 2.0889944977317113e-05, + "loss": 0.2634, + "step": 14626, + "teacher_loss": 0.2460833042860031 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 1.0263479948043823, + "learning_rate": 2.0887856081319896e-05, + "loss": 0.4365, + "step": 14627, + "teacher_loss": 0.37095707654953003 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.801518976688385, + "learning_rate": 2.0885767050331794e-05, + "loss": 0.3814, + "step": 14628, + "teacher_loss": 0.3347224295139313 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.5373536944389343, + "learning_rate": 2.0883677884400695e-05, + "loss": 0.225, + "step": 14629, + "teacher_loss": 0.190243199467659 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.563356876373291, + "learning_rate": 2.088158858357451e-05, + "loss": 0.3466, + "step": 14630, + "teacher_loss": 0.3224976062774658 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.24259087443351746, + "learning_rate": 2.0879499147901127e-05, + "loss": 0.2304, + "step": 14631, + "teacher_loss": 0.22904127836227417 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.4866694211959839, + "learning_rate": 2.0877409577428458e-05, + "loss": 0.3811, + "step": 14632, + "teacher_loss": 0.36938101053237915 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.23844614624977112, + "learning_rate": 2.087531987220441e-05, + "loss": 0.1762, + "step": 14633, + "teacher_loss": 0.16927990317344666 + }, + { + "compression_loss": 0.0, + "epoch": 2.64, + "label_loss": 0.39554959535598755, + "learning_rate": 2.087323003227689e-05, + "loss": 0.2579, + "step": 14634, + "teacher_loss": 0.24259623885154724 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.35707932710647583, + "learning_rate": 2.087114005769382e-05, + "loss": 0.2258, + "step": 14635, + "teacher_loss": 0.21115869283676147 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.7241878509521484, + "learning_rate": 2.08690499485031e-05, + "loss": 0.3516, + "step": 14636, + "teacher_loss": 0.31020867824554443 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.10791706293821335, + "learning_rate": 2.086695970475267e-05, + "loss": 0.2177, + "step": 14637, + "teacher_loss": 0.22994467616081238 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5874520540237427, + "learning_rate": 2.086486932649044e-05, + "loss": 0.3201, + "step": 14638, + "teacher_loss": 0.29044651985168457 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.4429684281349182, + "learning_rate": 2.0862778813764342e-05, + "loss": 0.26, + "step": 14639, + "teacher_loss": 0.2396511733531952 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.9647077322006226, + "learning_rate": 2.0860688166622306e-05, + "loss": 0.2707, + "step": 14640, + "teacher_loss": 0.19361506402492523 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 1.0816384553909302, + "learning_rate": 2.085859738511226e-05, + "loss": 0.7439, + "step": 14641, + "teacher_loss": 0.7063202857971191 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5294765830039978, + "learning_rate": 2.0856506469282134e-05, + "loss": 0.2488, + "step": 14642, + "teacher_loss": 0.217629075050354 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5293600559234619, + "learning_rate": 2.0854415419179884e-05, + "loss": 0.2878, + "step": 14643, + "teacher_loss": 0.2610046863555908 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5124907493591309, + "learning_rate": 2.0852324234853436e-05, + "loss": 0.2466, + "step": 14644, + "teacher_loss": 0.21706527471542358 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.24402692914009094, + "learning_rate": 2.0850232916350735e-05, + "loss": 0.1897, + "step": 14645, + "teacher_loss": 0.1836518943309784 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.23504573106765747, + "learning_rate": 2.084814146371974e-05, + "loss": 0.2255, + "step": 14646, + "teacher_loss": 0.2243957817554474 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5070377588272095, + "learning_rate": 2.084604987700839e-05, + "loss": 0.2814, + "step": 14647, + "teacher_loss": 0.256369411945343 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.7381858825683594, + "learning_rate": 2.0843958156264647e-05, + "loss": 0.3674, + "step": 14648, + "teacher_loss": 0.32615670561790466 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.2413741797208786, + "learning_rate": 2.0841866301536463e-05, + "loss": 0.2452, + "step": 14649, + "teacher_loss": 0.24560299515724182 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.3306029438972473, + "learning_rate": 2.08397743128718e-05, + "loss": 0.2428, + "step": 14650, + "teacher_loss": 0.23300690948963165 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.7581703066825867, + "learning_rate": 2.0837682190318626e-05, + "loss": 0.3801, + "step": 14651, + "teacher_loss": 0.3380867838859558 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5163165330886841, + "learning_rate": 2.083558993392489e-05, + "loss": 0.2548, + "step": 14652, + "teacher_loss": 0.22577707469463348 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.375863254070282, + "learning_rate": 2.083349754373858e-05, + "loss": 0.1938, + "step": 14653, + "teacher_loss": 0.17356109619140625 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.24171626567840576, + "learning_rate": 2.0831405019807664e-05, + "loss": 0.2003, + "step": 14654, + "teacher_loss": 0.19567041099071503 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 1.1167583465576172, + "learning_rate": 2.082931236218011e-05, + "loss": 0.619, + "step": 14655, + "teacher_loss": 0.5636433362960815 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.7422667145729065, + "learning_rate": 2.0827219570903898e-05, + "loss": 0.4879, + "step": 14656, + "teacher_loss": 0.4596354365348816 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.6334754824638367, + "learning_rate": 2.0825126646027017e-05, + "loss": 0.2474, + "step": 14657, + "teacher_loss": 0.20449486374855042 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.415048748254776, + "learning_rate": 2.0823033587597446e-05, + "loss": 0.2228, + "step": 14658, + "teacher_loss": 0.2014315128326416 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.2275198996067047, + "learning_rate": 2.082094039566317e-05, + "loss": 0.1891, + "step": 14659, + "teacher_loss": 0.1848609745502472 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.8425917625427246, + "learning_rate": 2.0818847070272188e-05, + "loss": 0.3965, + "step": 14660, + "teacher_loss": 0.34697920083999634 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.22775152325630188, + "learning_rate": 2.0816753611472487e-05, + "loss": 0.2498, + "step": 14661, + "teacher_loss": 0.252196341753006 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.28745514154434204, + "learning_rate": 2.081466001931206e-05, + "loss": 0.2238, + "step": 14662, + "teacher_loss": 0.21674004197120667 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5068768262863159, + "learning_rate": 2.081256629383892e-05, + "loss": 0.2716, + "step": 14663, + "teacher_loss": 0.24546386301517487 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 1.4280047416687012, + "learning_rate": 2.0810472435101052e-05, + "loss": 0.3688, + "step": 14664, + "teacher_loss": 0.25105544924736023 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.2577989101409912, + "learning_rate": 2.080837844314648e-05, + "loss": 0.1955, + "step": 14665, + "teacher_loss": 0.188523069024086 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.27874815464019775, + "learning_rate": 2.0806284318023203e-05, + "loss": 0.1641, + "step": 14666, + "teacher_loss": 0.15139323472976685 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.6849238872528076, + "learning_rate": 2.0804190059779227e-05, + "loss": 0.2678, + "step": 14667, + "teacher_loss": 0.22150607407093048 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.98930823802948, + "learning_rate": 2.080209566846259e-05, + "loss": 0.3024, + "step": 14668, + "teacher_loss": 0.2260798215866089 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5425367951393127, + "learning_rate": 2.0800001144121284e-05, + "loss": 0.3573, + "step": 14669, + "teacher_loss": 0.33671891689300537 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.11649313569068909, + "learning_rate": 2.0797906486803343e-05, + "loss": 0.2412, + "step": 14670, + "teacher_loss": 0.2551063001155853 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.4284122586250305, + "learning_rate": 2.079581169655679e-05, + "loss": 0.2422, + "step": 14671, + "teacher_loss": 0.22154012322425842 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.402354896068573, + "learning_rate": 2.079371677342965e-05, + "loss": 0.2287, + "step": 14672, + "teacher_loss": 0.209381103515625 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.24666452407836914, + "learning_rate": 2.079162171746996e-05, + "loss": 0.1819, + "step": 14673, + "teacher_loss": 0.1746814250946045 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 1.0770916938781738, + "learning_rate": 2.078952652872574e-05, + "loss": 0.43, + "step": 14674, + "teacher_loss": 0.3581126034259796 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.27461695671081543, + "learning_rate": 2.0787431207245044e-05, + "loss": 0.195, + "step": 14675, + "teacher_loss": 0.18610987067222595 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.4925176501274109, + "learning_rate": 2.0785335753075897e-05, + "loss": 0.2064, + "step": 14676, + "teacher_loss": 0.17457237839698792 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.2951141595840454, + "learning_rate": 2.0783240166266344e-05, + "loss": 0.2292, + "step": 14677, + "teacher_loss": 0.22184959053993225 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.761034369468689, + "learning_rate": 2.0781144446864436e-05, + "loss": 0.2727, + "step": 14678, + "teacher_loss": 0.21839825809001923 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.3799636960029602, + "learning_rate": 2.077904859491822e-05, + "loss": 0.2031, + "step": 14679, + "teacher_loss": 0.18344098329544067 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.5852311849594116, + "learning_rate": 2.0776952610475747e-05, + "loss": 0.34, + "step": 14680, + "teacher_loss": 0.3127070963382721 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.23990550637245178, + "learning_rate": 2.077485649358506e-05, + "loss": 0.2702, + "step": 14681, + "teacher_loss": 0.27358514070510864 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.3969283699989319, + "learning_rate": 2.0772760244294242e-05, + "loss": 0.2322, + "step": 14682, + "teacher_loss": 0.21390017867088318 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.8457655906677246, + "learning_rate": 2.0770663862651333e-05, + "loss": 0.3171, + "step": 14683, + "teacher_loss": 0.25832873582839966 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.3686928153038025, + "learning_rate": 2.0768567348704402e-05, + "loss": 0.3334, + "step": 14684, + "teacher_loss": 0.32942837476730347 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.6739547252655029, + "learning_rate": 2.076647070250152e-05, + "loss": 0.2527, + "step": 14685, + "teacher_loss": 0.20592814683914185 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.26532652974128723, + "learning_rate": 2.076437392409075e-05, + "loss": 0.2427, + "step": 14686, + "teacher_loss": 0.2401755303144455 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.6995213031768799, + "learning_rate": 2.0762277013520167e-05, + "loss": 0.2633, + "step": 14687, + "teacher_loss": 0.214847132563591 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.4038154184818268, + "learning_rate": 2.0760179970837855e-05, + "loss": 0.231, + "step": 14688, + "teacher_loss": 0.21183782815933228 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.47509586811065674, + "learning_rate": 2.075808279609188e-05, + "loss": 0.2801, + "step": 14689, + "teacher_loss": 0.25839611887931824 + }, + { + "compression_loss": 0.0, + "epoch": 2.65, + "label_loss": 0.7341402173042297, + "learning_rate": 2.0755985489330332e-05, + "loss": 0.3073, + "step": 14690, + "teacher_loss": 0.2598535120487213 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.4638305604457855, + "learning_rate": 2.0753888050601296e-05, + "loss": 0.2199, + "step": 14691, + "teacher_loss": 0.1927635669708252 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.23588450253009796, + "learning_rate": 2.0751790479952855e-05, + "loss": 0.192, + "step": 14692, + "teacher_loss": 0.1870940923690796 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5333236455917358, + "learning_rate": 2.0749692777433108e-05, + "loss": 0.3326, + "step": 14693, + "teacher_loss": 0.31025242805480957 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.7882859706878662, + "learning_rate": 2.0747594943090137e-05, + "loss": 0.6274, + "step": 14694, + "teacher_loss": 0.6095231175422668 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.37170228362083435, + "learning_rate": 2.074549697697205e-05, + "loss": 0.33, + "step": 14695, + "teacher_loss": 0.32532545924186707 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.16137054562568665, + "learning_rate": 2.074339887912694e-05, + "loss": 0.2005, + "step": 14696, + "teacher_loss": 0.20480555295944214 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.6075645685195923, + "learning_rate": 2.0741300649602917e-05, + "loss": 0.258, + "step": 14697, + "teacher_loss": 0.21915292739868164 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.31483930349349976, + "learning_rate": 2.0739202288448087e-05, + "loss": 0.2131, + "step": 14698, + "teacher_loss": 0.20175063610076904 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.31544730067253113, + "learning_rate": 2.0737103795710547e-05, + "loss": 0.1877, + "step": 14699, + "teacher_loss": 0.17353597283363342 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.3023248314857483, + "learning_rate": 2.0735005171438426e-05, + "loss": 0.3303, + "step": 14700, + "teacher_loss": 0.3333730399608612 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.9221238493919373, + "learning_rate": 2.073290641567983e-05, + "loss": 0.3116, + "step": 14701, + "teacher_loss": 0.2437857687473297 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.45813173055648804, + "learning_rate": 2.073080752848287e-05, + "loss": 0.3086, + "step": 14702, + "teacher_loss": 0.2920103073120117 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.33743762969970703, + "learning_rate": 2.0728708509895683e-05, + "loss": 0.2028, + "step": 14703, + "teacher_loss": 0.18779322504997253 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.3800710141658783, + "learning_rate": 2.072660935996638e-05, + "loss": 0.3146, + "step": 14704, + "teacher_loss": 0.3072799742221832 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.4884878993034363, + "learning_rate": 2.07245100787431e-05, + "loss": 0.1963, + "step": 14705, + "teacher_loss": 0.16387403011322021 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.4497961401939392, + "learning_rate": 2.0722410666273962e-05, + "loss": 0.2161, + "step": 14706, + "teacher_loss": 0.19016006588935852 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.30216801166534424, + "learning_rate": 2.07203111226071e-05, + "loss": 0.2682, + "step": 14707, + "teacher_loss": 0.26438117027282715 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.15227991342544556, + "learning_rate": 2.071821144779066e-05, + "loss": 0.1403, + "step": 14708, + "teacher_loss": 0.1389472782611847 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.28978532552719116, + "learning_rate": 2.071611164187278e-05, + "loss": 0.2554, + "step": 14709, + "teacher_loss": 0.25162842869758606 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.3722158968448639, + "learning_rate": 2.071401170490159e-05, + "loss": 0.1713, + "step": 14710, + "teacher_loss": 0.1490097939968109 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.30517736077308655, + "learning_rate": 2.0711911636925246e-05, + "loss": 0.1626, + "step": 14711, + "teacher_loss": 0.14673930406570435 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.42546653747558594, + "learning_rate": 2.0709811437991894e-05, + "loss": 0.236, + "step": 14712, + "teacher_loss": 0.21493341028690338 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.3332485258579254, + "learning_rate": 2.0707711108149683e-05, + "loss": 0.2306, + "step": 14713, + "teacher_loss": 0.21918663382530212 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.341885507106781, + "learning_rate": 2.070561064744677e-05, + "loss": 0.1602, + "step": 14714, + "teacher_loss": 0.14002607762813568 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5701409578323364, + "learning_rate": 2.070351005593131e-05, + "loss": 0.4271, + "step": 14715, + "teacher_loss": 0.41115817427635193 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.6645904779434204, + "learning_rate": 2.0701409333651468e-05, + "loss": 0.3465, + "step": 14716, + "teacher_loss": 0.3112054765224457 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.8569662570953369, + "learning_rate": 2.0699308480655397e-05, + "loss": 0.3761, + "step": 14717, + "teacher_loss": 0.3227236866950989 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.49947136640548706, + "learning_rate": 2.0697207496991277e-05, + "loss": 0.2813, + "step": 14718, + "teacher_loss": 0.25710728764533997 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5880289673805237, + "learning_rate": 2.0695106382707267e-05, + "loss": 0.2577, + "step": 14719, + "teacher_loss": 0.22094978392124176 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.18167561292648315, + "learning_rate": 2.0693005137851543e-05, + "loss": 0.1722, + "step": 14720, + "teacher_loss": 0.1711582988500595 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.38657066226005554, + "learning_rate": 2.069090376247228e-05, + "loss": 0.2095, + "step": 14721, + "teacher_loss": 0.18982061743736267 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5976470708847046, + "learning_rate": 2.068880225661765e-05, + "loss": 0.4819, + "step": 14722, + "teacher_loss": 0.469077467918396 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5527717471122742, + "learning_rate": 2.0686700620335854e-05, + "loss": 0.2502, + "step": 14723, + "teacher_loss": 0.21663561463356018 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.15773695707321167, + "learning_rate": 2.068459885367505e-05, + "loss": 0.1825, + "step": 14724, + "teacher_loss": 0.18526363372802734 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5297321081161499, + "learning_rate": 2.0682496956683442e-05, + "loss": 0.1848, + "step": 14725, + "teacher_loss": 0.146424800157547 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.602410078048706, + "learning_rate": 2.0680394929409215e-05, + "loss": 0.288, + "step": 14726, + "teacher_loss": 0.25306910276412964 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.44356757402420044, + "learning_rate": 2.0678292771900565e-05, + "loss": 0.2606, + "step": 14727, + "teacher_loss": 0.24022988975048065 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.6300420761108398, + "learning_rate": 2.0676190484205686e-05, + "loss": 0.2512, + "step": 14728, + "teacher_loss": 0.20909321308135986 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5623605251312256, + "learning_rate": 2.0674088066372773e-05, + "loss": 0.2513, + "step": 14729, + "teacher_loss": 0.21677166223526 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.32679712772369385, + "learning_rate": 2.0671985518450036e-05, + "loss": 0.317, + "step": 14730, + "teacher_loss": 0.31596440076828003 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5877115726470947, + "learning_rate": 2.0669882840485678e-05, + "loss": 0.4483, + "step": 14731, + "teacher_loss": 0.4328177273273468 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.406552255153656, + "learning_rate": 2.06677800325279e-05, + "loss": 0.2916, + "step": 14732, + "teacher_loss": 0.2788045406341553 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.7280393838882446, + "learning_rate": 2.0665677094624925e-05, + "loss": 0.3261, + "step": 14733, + "teacher_loss": 0.2814173102378845 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.22949644923210144, + "learning_rate": 2.0663574026824956e-05, + "loss": 0.2826, + "step": 14734, + "teacher_loss": 0.28846633434295654 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.22902411222457886, + "learning_rate": 2.0661470829176215e-05, + "loss": 0.1874, + "step": 14735, + "teacher_loss": 0.18280129134655 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.40011727809906006, + "learning_rate": 2.0659367501726926e-05, + "loss": 0.2172, + "step": 14736, + "teacher_loss": 0.19682228565216064 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.3148219883441925, + "learning_rate": 2.0657264044525304e-05, + "loss": 0.26, + "step": 14737, + "teacher_loss": 0.2539372742176056 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.3955051004886627, + "learning_rate": 2.065516045761958e-05, + "loss": 0.1598, + "step": 14738, + "teacher_loss": 0.13363346457481384 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 1.0723299980163574, + "learning_rate": 2.0653056741057986e-05, + "loss": 0.6933, + "step": 14739, + "teacher_loss": 0.6512176990509033 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.45492395758628845, + "learning_rate": 2.065095289488874e-05, + "loss": 0.2737, + "step": 14740, + "teacher_loss": 0.2535954713821411 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.4235838055610657, + "learning_rate": 2.0648848919160096e-05, + "loss": 0.1998, + "step": 14741, + "teacher_loss": 0.1749129444360733 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.36798763275146484, + "learning_rate": 2.0646744813920278e-05, + "loss": 0.2218, + "step": 14742, + "teacher_loss": 0.20555952191352844 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.40499061346054077, + "learning_rate": 2.0644640579217536e-05, + "loss": 0.2394, + "step": 14743, + "teacher_loss": 0.2209864854812622 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.5199931263923645, + "learning_rate": 2.06425362151001e-05, + "loss": 0.3907, + "step": 14744, + "teacher_loss": 0.376347154378891 + }, + { + "compression_loss": 0.0, + "epoch": 2.66, + "label_loss": 0.268401175737381, + "learning_rate": 2.0640431721616233e-05, + "loss": 0.1944, + "step": 14745, + "teacher_loss": 0.1861613541841507 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.30936434864997864, + "learning_rate": 2.0638327098814175e-05, + "loss": 0.2118, + "step": 14746, + "teacher_loss": 0.2009427547454834 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 1.0602633953094482, + "learning_rate": 2.0636222346742184e-05, + "loss": 0.3706, + "step": 14747, + "teacher_loss": 0.29398536682128906 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.8793731927871704, + "learning_rate": 2.0634117465448507e-05, + "loss": 0.3002, + "step": 14748, + "teacher_loss": 0.23583374917507172 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2738930583000183, + "learning_rate": 2.063201245498141e-05, + "loss": 0.2426, + "step": 14749, + "teacher_loss": 0.23912805318832397 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 1.2484601736068726, + "learning_rate": 2.0629907315389157e-05, + "loss": 0.336, + "step": 14750, + "teacher_loss": 0.23462095856666565 + }, + { + "epoch": 2.67, + "eval_exact_match": 79.59318826868495, + "eval_f1": 87.144408385029, + "step": 14750 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6365036964416504, + "learning_rate": 2.0627802046720008e-05, + "loss": 0.2622, + "step": 14751, + "teacher_loss": 0.2206268608570099 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5054407119750977, + "learning_rate": 2.0625696649022225e-05, + "loss": 0.2488, + "step": 14752, + "teacher_loss": 0.22033405303955078 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.4557850956916809, + "learning_rate": 2.0623591122344093e-05, + "loss": 0.2012, + "step": 14753, + "teacher_loss": 0.17294421792030334 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6534373760223389, + "learning_rate": 2.0621485466733875e-05, + "loss": 0.3412, + "step": 14754, + "teacher_loss": 0.3064731955528259 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.10340330749750137, + "learning_rate": 2.0619379682239845e-05, + "loss": 0.2099, + "step": 14755, + "teacher_loss": 0.22174470126628876 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.1720178872346878, + "learning_rate": 2.0617273768910288e-05, + "loss": 0.1669, + "step": 14756, + "teacher_loss": 0.1663341224193573 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.7388808727264404, + "learning_rate": 2.0615167726793485e-05, + "loss": 0.2578, + "step": 14757, + "teacher_loss": 0.20435532927513123 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.18003569543361664, + "learning_rate": 2.061306155593773e-05, + "loss": 0.1983, + "step": 14758, + "teacher_loss": 0.2003352791070938 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.42469677329063416, + "learning_rate": 2.061095525639129e-05, + "loss": 0.1962, + "step": 14759, + "teacher_loss": 0.17084118723869324 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2845797836780548, + "learning_rate": 2.060884882820247e-05, + "loss": 0.2396, + "step": 14760, + "teacher_loss": 0.234589621424675 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.28279832005500793, + "learning_rate": 2.060674227141957e-05, + "loss": 0.2059, + "step": 14761, + "teacher_loss": 0.19737903773784637 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6885349154472351, + "learning_rate": 2.0604635586090873e-05, + "loss": 0.3231, + "step": 14762, + "teacher_loss": 0.28254565596580505 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6126066446304321, + "learning_rate": 2.060252877226469e-05, + "loss": 0.3972, + "step": 14763, + "teacher_loss": 0.3732442855834961 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.8504101037979126, + "learning_rate": 2.0600421829989314e-05, + "loss": 0.3734, + "step": 14764, + "teacher_loss": 0.3203462064266205 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5746818780899048, + "learning_rate": 2.059831475931306e-05, + "loss": 0.3413, + "step": 14765, + "teacher_loss": 0.3153492212295532 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5643368363380432, + "learning_rate": 2.0596207560284238e-05, + "loss": 0.2862, + "step": 14766, + "teacher_loss": 0.25526243448257446 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5080165863037109, + "learning_rate": 2.0594100232951147e-05, + "loss": 0.2295, + "step": 14767, + "teacher_loss": 0.19854536652565002 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.3373258709907532, + "learning_rate": 2.0591992777362112e-05, + "loss": 0.2633, + "step": 14768, + "teacher_loss": 0.25505757331848145 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.087464340031147, + "learning_rate": 2.058988519356545e-05, + "loss": 0.149, + "step": 14769, + "teacher_loss": 0.15580469369888306 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2055964320898056, + "learning_rate": 2.0587777481609476e-05, + "loss": 0.1789, + "step": 14770, + "teacher_loss": 0.1759772151708603 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.38859981298446655, + "learning_rate": 2.058566964154252e-05, + "loss": 0.2961, + "step": 14771, + "teacher_loss": 0.2858433127403259 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.23692506551742554, + "learning_rate": 2.0583561673412908e-05, + "loss": 0.312, + "step": 14772, + "teacher_loss": 0.3203795552253723 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.568581223487854, + "learning_rate": 2.0581453577268967e-05, + "loss": 0.3331, + "step": 14773, + "teacher_loss": 0.3069083094596863 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6714084148406982, + "learning_rate": 2.057934535315902e-05, + "loss": 0.3505, + "step": 14774, + "teacher_loss": 0.31489598751068115 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.16717246174812317, + "learning_rate": 2.0577237001131424e-05, + "loss": 0.2964, + "step": 14775, + "teacher_loss": 0.310787558555603 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.4580898880958557, + "learning_rate": 2.05751285212345e-05, + "loss": 0.2646, + "step": 14776, + "teacher_loss": 0.24304884672164917 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.451576828956604, + "learning_rate": 2.0573019913516597e-05, + "loss": 0.2937, + "step": 14777, + "teacher_loss": 0.2761993110179901 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.16200639307498932, + "learning_rate": 2.0570911178026054e-05, + "loss": 0.133, + "step": 14778, + "teacher_loss": 0.1297340989112854 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.3857666254043579, + "learning_rate": 2.0568802314811214e-05, + "loss": 0.2469, + "step": 14779, + "teacher_loss": 0.23148225247859955 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.4879947900772095, + "learning_rate": 2.0566693323920444e-05, + "loss": 0.2986, + "step": 14780, + "teacher_loss": 0.2775239944458008 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.8031771183013916, + "learning_rate": 2.0564584205402077e-05, + "loss": 0.5503, + "step": 14781, + "teacher_loss": 0.5221806764602661 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2689746618270874, + "learning_rate": 2.056247495930448e-05, + "loss": 0.1917, + "step": 14782, + "teacher_loss": 0.1830991506576538 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6977099180221558, + "learning_rate": 2.0560365585676017e-05, + "loss": 0.3911, + "step": 14783, + "teacher_loss": 0.35707300901412964 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5370658040046692, + "learning_rate": 2.055825608456503e-05, + "loss": 0.274, + "step": 14784, + "teacher_loss": 0.24477878212928772 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.3308143615722656, + "learning_rate": 2.05561464560199e-05, + "loss": 0.2497, + "step": 14785, + "teacher_loss": 0.2407037913799286 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5128515958786011, + "learning_rate": 2.0554036700088996e-05, + "loss": 0.2721, + "step": 14786, + "teacher_loss": 0.24531877040863037 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.6130527257919312, + "learning_rate": 2.0551926816820675e-05, + "loss": 0.3641, + "step": 14787, + "teacher_loss": 0.3364737629890442 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2753695547580719, + "learning_rate": 2.0549816806263322e-05, + "loss": 0.2395, + "step": 14788, + "teacher_loss": 0.23546728491783142 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2698274254798889, + "learning_rate": 2.0547706668465306e-05, + "loss": 0.2708, + "step": 14789, + "teacher_loss": 0.27089452743530273 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5168047547340393, + "learning_rate": 2.0545596403475008e-05, + "loss": 0.4558, + "step": 14790, + "teacher_loss": 0.4490697979927063 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5163775682449341, + "learning_rate": 2.0543486011340815e-05, + "loss": 0.33, + "step": 14791, + "teacher_loss": 0.30933457612991333 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.34799379110336304, + "learning_rate": 2.0541375492111107e-05, + "loss": 0.2287, + "step": 14792, + "teacher_loss": 0.21542897820472717 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.3095616102218628, + "learning_rate": 2.053926484583427e-05, + "loss": 0.2214, + "step": 14793, + "teacher_loss": 0.21160462498664856 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.7041926980018616, + "learning_rate": 2.05371540725587e-05, + "loss": 0.322, + "step": 14794, + "teacher_loss": 0.27954310178756714 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.294477641582489, + "learning_rate": 2.0535043172332787e-05, + "loss": 0.3611, + "step": 14795, + "teacher_loss": 0.3685116171836853 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.1934657096862793, + "learning_rate": 2.0532932145204932e-05, + "loss": 0.2109, + "step": 14796, + "teacher_loss": 0.21281927824020386 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.2769962549209595, + "learning_rate": 2.053082099122353e-05, + "loss": 0.37, + "step": 14797, + "teacher_loss": 0.38036006689071655 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.669194757938385, + "learning_rate": 2.0528709710436982e-05, + "loss": 0.2887, + "step": 14798, + "teacher_loss": 0.2464020550251007 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.8606622219085693, + "learning_rate": 2.05265983028937e-05, + "loss": 0.318, + "step": 14799, + "teacher_loss": 0.2577003240585327 + }, + { + "compression_loss": 0.0, + "epoch": 2.67, + "label_loss": 0.5190480947494507, + "learning_rate": 2.0524486768642086e-05, + "loss": 0.2916, + "step": 14800, + "teacher_loss": 0.2663763761520386 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.34998780488967896, + "learning_rate": 2.0522375107730556e-05, + "loss": 0.2638, + "step": 14801, + "teacher_loss": 0.2541815936565399 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4187970757484436, + "learning_rate": 2.052026332020752e-05, + "loss": 0.2141, + "step": 14802, + "teacher_loss": 0.19130240380764008 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.845827043056488, + "learning_rate": 2.05181514061214e-05, + "loss": 0.2638, + "step": 14803, + "teacher_loss": 0.1991061568260193 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.3965373635292053, + "learning_rate": 2.0516039365520607e-05, + "loss": 0.2277, + "step": 14804, + "teacher_loss": 0.20899033546447754 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.32548409700393677, + "learning_rate": 2.0513927198453573e-05, + "loss": 0.2778, + "step": 14805, + "teacher_loss": 0.27255067229270935 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4849543571472168, + "learning_rate": 2.0511814904968717e-05, + "loss": 0.1671, + "step": 14806, + "teacher_loss": 0.13182950019836426 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4429827332496643, + "learning_rate": 2.050970248511447e-05, + "loss": 0.2927, + "step": 14807, + "teacher_loss": 0.27595388889312744 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.40011781454086304, + "learning_rate": 2.050758993893927e-05, + "loss": 0.2157, + "step": 14808, + "teacher_loss": 0.1952388882637024 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.5186194181442261, + "learning_rate": 2.050547726649154e-05, + "loss": 0.3738, + "step": 14809, + "teacher_loss": 0.35766738653182983 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.569491446018219, + "learning_rate": 2.0503364467819725e-05, + "loss": 0.2493, + "step": 14810, + "teacher_loss": 0.21372094750404358 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.33216992020606995, + "learning_rate": 2.0501251542972262e-05, + "loss": 0.2538, + "step": 14811, + "teacher_loss": 0.24510881304740906 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4135696589946747, + "learning_rate": 2.0499138491997592e-05, + "loss": 0.2934, + "step": 14812, + "teacher_loss": 0.28000539541244507 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.42928725481033325, + "learning_rate": 2.049702531494417e-05, + "loss": 0.258, + "step": 14813, + "teacher_loss": 0.2389732450246811 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.5191138982772827, + "learning_rate": 2.0494912011860435e-05, + "loss": 0.258, + "step": 14814, + "teacher_loss": 0.22902607917785645 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.31187760829925537, + "learning_rate": 2.049279858279484e-05, + "loss": 0.2877, + "step": 14815, + "teacher_loss": 0.28500157594680786 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.23061445355415344, + "learning_rate": 2.0490685027795843e-05, + "loss": 0.254, + "step": 14816, + "teacher_loss": 0.2566325068473816 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.3891162872314453, + "learning_rate": 2.04885713469119e-05, + "loss": 0.2109, + "step": 14817, + "teacher_loss": 0.1911163032054901 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.24955663084983826, + "learning_rate": 2.0486457540191474e-05, + "loss": 0.1846, + "step": 14818, + "teacher_loss": 0.17732734978199005 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.2184136062860489, + "learning_rate": 2.0484343607683026e-05, + "loss": 0.209, + "step": 14819, + "teacher_loss": 0.20798182487487793 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4304956793785095, + "learning_rate": 2.0482229549435017e-05, + "loss": 0.3369, + "step": 14820, + "teacher_loss": 0.3264923691749573 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.6395919322967529, + "learning_rate": 2.0480115365495928e-05, + "loss": 0.4184, + "step": 14821, + "teacher_loss": 0.39378637075424194 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.44393083453178406, + "learning_rate": 2.047800105591422e-05, + "loss": 0.2814, + "step": 14822, + "teacher_loss": 0.26332634687423706 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.1826765239238739, + "learning_rate": 2.047588662073837e-05, + "loss": 0.2174, + "step": 14823, + "teacher_loss": 0.22122052311897278 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.18634039163589478, + "learning_rate": 2.0473772060016862e-05, + "loss": 0.2608, + "step": 14824, + "teacher_loss": 0.2690912187099457 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4635426104068756, + "learning_rate": 2.047165737379817e-05, + "loss": 0.2333, + "step": 14825, + "teacher_loss": 0.2077116072177887 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.5371942520141602, + "learning_rate": 2.0469542562130775e-05, + "loss": 0.5128, + "step": 14826, + "teacher_loss": 0.5100386142730713 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.2768608629703522, + "learning_rate": 2.046742762506317e-05, + "loss": 0.2259, + "step": 14827, + "teacher_loss": 0.22021490335464478 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.24004840850830078, + "learning_rate": 2.0465312562643846e-05, + "loss": 0.2119, + "step": 14828, + "teacher_loss": 0.20877695083618164 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.8347723484039307, + "learning_rate": 2.0463197374921288e-05, + "loss": 0.5108, + "step": 14829, + "teacher_loss": 0.4747787117958069 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.3548860549926758, + "learning_rate": 2.046108206194399e-05, + "loss": 0.2471, + "step": 14830, + "teacher_loss": 0.23514020442962646 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4827243685722351, + "learning_rate": 2.045896662376046e-05, + "loss": 0.2524, + "step": 14831, + "teacher_loss": 0.22675958275794983 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.7522687911987305, + "learning_rate": 2.045685106041919e-05, + "loss": 0.3689, + "step": 14832, + "teacher_loss": 0.32631945610046387 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.6438785195350647, + "learning_rate": 2.0454735371968683e-05, + "loss": 0.2849, + "step": 14833, + "teacher_loss": 0.24505344033241272 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.8225363492965698, + "learning_rate": 2.0452619558457448e-05, + "loss": 0.2528, + "step": 14834, + "teacher_loss": 0.18947236239910126 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.2629261612892151, + "learning_rate": 2.0450503619933997e-05, + "loss": 0.2314, + "step": 14835, + "teacher_loss": 0.22791366279125214 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.25196897983551025, + "learning_rate": 2.044838755644684e-05, + "loss": 0.2562, + "step": 14836, + "teacher_loss": 0.25667887926101685 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.14289674162864685, + "learning_rate": 2.0446271368044484e-05, + "loss": 0.1845, + "step": 14837, + "teacher_loss": 0.1891317069530487 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.12363754212856293, + "learning_rate": 2.0444155054775463e-05, + "loss": 0.1938, + "step": 14838, + "teacher_loss": 0.2016124129295349 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.3097110092639923, + "learning_rate": 2.044203861668829e-05, + "loss": 0.3324, + "step": 14839, + "teacher_loss": 0.33490562438964844 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.524707555770874, + "learning_rate": 2.0439922053831482e-05, + "loss": 0.3185, + "step": 14840, + "teacher_loss": 0.2956264019012451 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.6648913621902466, + "learning_rate": 2.0437805366253573e-05, + "loss": 0.2934, + "step": 14841, + "teacher_loss": 0.25209522247314453 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 1.0346568822860718, + "learning_rate": 2.043568855400309e-05, + "loss": 0.3442, + "step": 14842, + "teacher_loss": 0.26744967699050903 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.5004614591598511, + "learning_rate": 2.0433571617128565e-05, + "loss": 0.2421, + "step": 14843, + "teacher_loss": 0.21341001987457275 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.22472357749938965, + "learning_rate": 2.0431454555678536e-05, + "loss": 0.2311, + "step": 14844, + "teacher_loss": 0.23185688257217407 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.4382818341255188, + "learning_rate": 2.0429337369701535e-05, + "loss": 0.3341, + "step": 14845, + "teacher_loss": 0.32250866293907166 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.32862529158592224, + "learning_rate": 2.042722005924611e-05, + "loss": 0.1858, + "step": 14846, + "teacher_loss": 0.16997158527374268 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.43893536925315857, + "learning_rate": 2.0425102624360797e-05, + "loss": 0.3055, + "step": 14847, + "teacher_loss": 0.29072582721710205 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.5672550797462463, + "learning_rate": 2.0422985065094146e-05, + "loss": 0.2316, + "step": 14848, + "teacher_loss": 0.19433942437171936 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.27996551990509033, + "learning_rate": 2.042086738149471e-05, + "loss": 0.1841, + "step": 14849, + "teacher_loss": 0.17339572310447693 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.9047365188598633, + "learning_rate": 2.0418749573611033e-05, + "loss": 0.4897, + "step": 14850, + "teacher_loss": 0.44353121519088745 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.533079981803894, + "learning_rate": 2.041663164149168e-05, + "loss": 0.2438, + "step": 14851, + "teacher_loss": 0.21162816882133484 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 1.0755133628845215, + "learning_rate": 2.04145135851852e-05, + "loss": 0.3964, + "step": 14852, + "teacher_loss": 0.3209264278411865 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.7049232721328735, + "learning_rate": 2.0412395404740156e-05, + "loss": 0.2866, + "step": 14853, + "teacher_loss": 0.24007275700569153 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.5834645628929138, + "learning_rate": 2.0410277100205116e-05, + "loss": 0.3281, + "step": 14854, + "teacher_loss": 0.2997483015060425 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.6641458868980408, + "learning_rate": 2.040815867162864e-05, + "loss": 0.4216, + "step": 14855, + "teacher_loss": 0.3946259319782257 + }, + { + "compression_loss": 0.0, + "epoch": 2.68, + "label_loss": 0.32788607478141785, + "learning_rate": 2.04060401190593e-05, + "loss": 0.2071, + "step": 14856, + "teacher_loss": 0.1936974823474884 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.2416599839925766, + "learning_rate": 2.040392144254567e-05, + "loss": 0.2604, + "step": 14857, + "teacher_loss": 0.26249971985816956 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5173584222793579, + "learning_rate": 2.040180264213632e-05, + "loss": 0.3391, + "step": 14858, + "teacher_loss": 0.3193283975124359 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.3677915930747986, + "learning_rate": 2.0399683717879837e-05, + "loss": 0.2568, + "step": 14859, + "teacher_loss": 0.24450330436229706 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.22522315382957458, + "learning_rate": 2.0397564669824792e-05, + "loss": 0.2287, + "step": 14860, + "teacher_loss": 0.22910797595977783 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5012238025665283, + "learning_rate": 2.039544549801977e-05, + "loss": 0.2218, + "step": 14861, + "teacher_loss": 0.19079944491386414 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.3694916367530823, + "learning_rate": 2.039332620251336e-05, + "loss": 0.2039, + "step": 14862, + "teacher_loss": 0.18554240465164185 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.22652840614318848, + "learning_rate": 2.0391206783354154e-05, + "loss": 0.2142, + "step": 14863, + "teacher_loss": 0.21282660961151123 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6911014318466187, + "learning_rate": 2.0389087240590736e-05, + "loss": 0.3094, + "step": 14864, + "teacher_loss": 0.26693761348724365 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.2800799310207367, + "learning_rate": 2.0386967574271705e-05, + "loss": 0.2336, + "step": 14865, + "teacher_loss": 0.2284429520368576 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.37927111983299255, + "learning_rate": 2.038484778444566e-05, + "loss": 0.2709, + "step": 14866, + "teacher_loss": 0.25888872146606445 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5265012383460999, + "learning_rate": 2.0382727871161197e-05, + "loss": 0.4306, + "step": 14867, + "teacher_loss": 0.4199068546295166 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6282833814620972, + "learning_rate": 2.0380607834466927e-05, + "loss": 0.2349, + "step": 14868, + "teacher_loss": 0.19116206467151642 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.509934663772583, + "learning_rate": 2.0378487674411447e-05, + "loss": 0.2903, + "step": 14869, + "teacher_loss": 0.265918493270874 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6388763189315796, + "learning_rate": 2.0376367391043375e-05, + "loss": 0.2254, + "step": 14870, + "teacher_loss": 0.17950831353664398 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 1.0789724588394165, + "learning_rate": 2.0374246984411315e-05, + "loss": 0.4355, + "step": 14871, + "teacher_loss": 0.36395561695098877 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5592183470726013, + "learning_rate": 2.0372126454563882e-05, + "loss": 0.2987, + "step": 14872, + "teacher_loss": 0.2697659134864807 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.35236307978630066, + "learning_rate": 2.03700058015497e-05, + "loss": 0.2188, + "step": 14873, + "teacher_loss": 0.20399588346481323 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.31600648164749146, + "learning_rate": 2.036788502541738e-05, + "loss": 0.269, + "step": 14874, + "teacher_loss": 0.2637813687324524 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.48180603981018066, + "learning_rate": 2.0365764126215555e-05, + "loss": 0.219, + "step": 14875, + "teacher_loss": 0.18974421918392181 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5358086824417114, + "learning_rate": 2.0363643103992847e-05, + "loss": 0.3445, + "step": 14876, + "teacher_loss": 0.32321232557296753 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5469584465026855, + "learning_rate": 2.036152195879788e-05, + "loss": 0.3533, + "step": 14877, + "teacher_loss": 0.33181384205818176 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.42526283860206604, + "learning_rate": 2.0359400690679288e-05, + "loss": 0.2364, + "step": 14878, + "teacher_loss": 0.21537292003631592 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5540854930877686, + "learning_rate": 2.035727929968571e-05, + "loss": 0.2367, + "step": 14879, + "teacher_loss": 0.2014390528202057 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5503325462341309, + "learning_rate": 2.0355157785865778e-05, + "loss": 0.2183, + "step": 14880, + "teacher_loss": 0.1814216673374176 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.504286527633667, + "learning_rate": 2.0353036149268135e-05, + "loss": 0.2499, + "step": 14881, + "teacher_loss": 0.22167330980300903 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.8380905389785767, + "learning_rate": 2.0350914389941423e-05, + "loss": 0.3318, + "step": 14882, + "teacher_loss": 0.2755822241306305 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.4582725763320923, + "learning_rate": 2.0348792507934283e-05, + "loss": 0.2939, + "step": 14883, + "teacher_loss": 0.2755844295024872 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6674776077270508, + "learning_rate": 2.034667050329537e-05, + "loss": 0.2184, + "step": 14884, + "teacher_loss": 0.16845136880874634 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.27727022767066956, + "learning_rate": 2.0344548376073328e-05, + "loss": 0.241, + "step": 14885, + "teacher_loss": 0.23698115348815918 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.2868749797344208, + "learning_rate": 2.0342426126316825e-05, + "loss": 0.2382, + "step": 14886, + "teacher_loss": 0.23276180028915405 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.522817850112915, + "learning_rate": 2.03403037540745e-05, + "loss": 0.2846, + "step": 14887, + "teacher_loss": 0.2581116557121277 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5375682711601257, + "learning_rate": 2.0338181259395023e-05, + "loss": 0.2082, + "step": 14888, + "teacher_loss": 0.17165547609329224 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6137347221374512, + "learning_rate": 2.0336058642327058e-05, + "loss": 0.3141, + "step": 14889, + "teacher_loss": 0.28084874153137207 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5696321725845337, + "learning_rate": 2.0333935902919264e-05, + "loss": 0.3638, + "step": 14890, + "teacher_loss": 0.340931236743927 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.2673691213130951, + "learning_rate": 2.0331813041220312e-05, + "loss": 0.2372, + "step": 14891, + "teacher_loss": 0.2338741570711136 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.4888160228729248, + "learning_rate": 2.032969005727887e-05, + "loss": 0.2351, + "step": 14892, + "teacher_loss": 0.20687642693519592 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6059926748275757, + "learning_rate": 2.0327566951143615e-05, + "loss": 0.3272, + "step": 14893, + "teacher_loss": 0.2962338328361511 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.24283966422080994, + "learning_rate": 2.0325443722863228e-05, + "loss": 0.2503, + "step": 14894, + "teacher_loss": 0.25113609433174133 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.27584022283554077, + "learning_rate": 2.0323320372486378e-05, + "loss": 0.223, + "step": 14895, + "teacher_loss": 0.21708548069000244 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.17030948400497437, + "learning_rate": 2.0321196900061753e-05, + "loss": 0.1562, + "step": 14896, + "teacher_loss": 0.15462495386600494 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6236451864242554, + "learning_rate": 2.0319073305638035e-05, + "loss": 0.2856, + "step": 14897, + "teacher_loss": 0.24798667430877686 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.3590516448020935, + "learning_rate": 2.0316949589263917e-05, + "loss": 0.2264, + "step": 14898, + "teacher_loss": 0.21161547303199768 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.2167343944311142, + "learning_rate": 2.031482575098808e-05, + "loss": 0.2339, + "step": 14899, + "teacher_loss": 0.23578590154647827 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.43944716453552246, + "learning_rate": 2.031270179085923e-05, + "loss": 0.2189, + "step": 14900, + "teacher_loss": 0.19440308213233948 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.23635739088058472, + "learning_rate": 2.0310577708926057e-05, + "loss": 0.2483, + "step": 14901, + "teacher_loss": 0.24957653880119324 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.4435775876045227, + "learning_rate": 2.0308453505237252e-05, + "loss": 0.2123, + "step": 14902, + "teacher_loss": 0.18658530712127686 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.6203392148017883, + "learning_rate": 2.030632917984153e-05, + "loss": 0.3012, + "step": 14903, + "teacher_loss": 0.26576539874076843 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.38520002365112305, + "learning_rate": 2.0304204732787585e-05, + "loss": 0.3173, + "step": 14904, + "teacher_loss": 0.3097492456436157 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.39204150438308716, + "learning_rate": 2.0302080164124128e-05, + "loss": 0.1985, + "step": 14905, + "teacher_loss": 0.17703217267990112 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.385428249835968, + "learning_rate": 2.0299955473899876e-05, + "loss": 0.2028, + "step": 14906, + "teacher_loss": 0.18245339393615723 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.3512749671936035, + "learning_rate": 2.029783066216353e-05, + "loss": 0.296, + "step": 14907, + "teacher_loss": 0.28990548849105835 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.21630430221557617, + "learning_rate": 2.0295705728963808e-05, + "loss": 0.2158, + "step": 14908, + "teacher_loss": 0.2157471776008606 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.5027220249176025, + "learning_rate": 2.029358067434944e-05, + "loss": 0.2659, + "step": 14909, + "teacher_loss": 0.2395501434803009 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.179561585187912, + "learning_rate": 2.0291455498369128e-05, + "loss": 0.2769, + "step": 14910, + "teacher_loss": 0.28771156072616577 + }, + { + "compression_loss": 0.0, + "epoch": 2.69, + "label_loss": 0.18068565428256989, + "learning_rate": 2.028933020107161e-05, + "loss": 0.1871, + "step": 14911, + "teacher_loss": 0.18781127035617828 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4077897071838379, + "learning_rate": 2.028720478250561e-05, + "loss": 0.2073, + "step": 14912, + "teacher_loss": 0.18507859110832214 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.46194833517074585, + "learning_rate": 2.028507924271985e-05, + "loss": 0.2616, + "step": 14913, + "teacher_loss": 0.23930887877941132 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4916226863861084, + "learning_rate": 2.028295358176308e-05, + "loss": 0.2695, + "step": 14914, + "teacher_loss": 0.24482116103172302 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.36132073402404785, + "learning_rate": 2.0280827799684013e-05, + "loss": 0.2553, + "step": 14915, + "teacher_loss": 0.2434784471988678 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.40071791410446167, + "learning_rate": 2.0278701896531404e-05, + "loss": 0.2575, + "step": 14916, + "teacher_loss": 0.2416183203458786 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5685251355171204, + "learning_rate": 2.027657587235398e-05, + "loss": 0.2723, + "step": 14917, + "teacher_loss": 0.23935860395431519 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.2976388931274414, + "learning_rate": 2.0274449727200497e-05, + "loss": 0.209, + "step": 14918, + "teacher_loss": 0.19914615154266357 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4672843813896179, + "learning_rate": 2.0272323461119694e-05, + "loss": 0.2154, + "step": 14919, + "teacher_loss": 0.1874154955148697 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5073993802070618, + "learning_rate": 2.0270197074160323e-05, + "loss": 0.4834, + "step": 14920, + "teacher_loss": 0.48074042797088623 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 1.1281288862228394, + "learning_rate": 2.0268070566371133e-05, + "loss": 0.6246, + "step": 14921, + "teacher_loss": 0.5686434507369995 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4203755855560303, + "learning_rate": 2.0265943937800875e-05, + "loss": 0.2661, + "step": 14922, + "teacher_loss": 0.24896439909934998 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5933854579925537, + "learning_rate": 2.0263817188498313e-05, + "loss": 0.2773, + "step": 14923, + "teacher_loss": 0.24218492209911346 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4876479506492615, + "learning_rate": 2.0261690318512207e-05, + "loss": 0.3052, + "step": 14924, + "teacher_loss": 0.28497496247291565 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.677702009677887, + "learning_rate": 2.025956332789132e-05, + "loss": 0.2422, + "step": 14925, + "teacher_loss": 0.19386549293994904 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.930736780166626, + "learning_rate": 2.025743621668441e-05, + "loss": 0.3899, + "step": 14926, + "teacher_loss": 0.3298322558403015 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6612527966499329, + "learning_rate": 2.025530898494025e-05, + "loss": 0.3012, + "step": 14927, + "teacher_loss": 0.2612287998199463 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.8193802237510681, + "learning_rate": 2.025318163270761e-05, + "loss": 0.5153, + "step": 14928, + "teacher_loss": 0.48155510425567627 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4310070872306824, + "learning_rate": 2.025105416003527e-05, + "loss": 0.2806, + "step": 14929, + "teacher_loss": 0.26393601298332214 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.25112998485565186, + "learning_rate": 2.0248926566971996e-05, + "loss": 0.2119, + "step": 14930, + "teacher_loss": 0.2074863612651825 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6088855266571045, + "learning_rate": 2.024679885356658e-05, + "loss": 0.2971, + "step": 14931, + "teacher_loss": 0.2624693512916565 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6737299561500549, + "learning_rate": 2.0244671019867788e-05, + "loss": 0.2164, + "step": 14932, + "teacher_loss": 0.1656334400177002 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.22340169548988342, + "learning_rate": 2.024254306592442e-05, + "loss": 0.1796, + "step": 14933, + "teacher_loss": 0.17475393414497375 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6424590349197388, + "learning_rate": 2.024041499178526e-05, + "loss": 0.2593, + "step": 14934, + "teacher_loss": 0.21674300730228424 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.2921537160873413, + "learning_rate": 2.0238286797499085e-05, + "loss": 0.2715, + "step": 14935, + "teacher_loss": 0.2692144513130188 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4621097147464752, + "learning_rate": 2.0236158483114707e-05, + "loss": 0.2368, + "step": 14936, + "teacher_loss": 0.21177539229393005 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.38373756408691406, + "learning_rate": 2.0234030048680913e-05, + "loss": 0.2315, + "step": 14937, + "teacher_loss": 0.21457837522029877 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.3856561779975891, + "learning_rate": 2.0231901494246504e-05, + "loss": 0.2405, + "step": 14938, + "teacher_loss": 0.22442519664764404 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.43048378825187683, + "learning_rate": 2.022977281986028e-05, + "loss": 0.2571, + "step": 14939, + "teacher_loss": 0.23779019713401794 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5011731386184692, + "learning_rate": 2.022764402557104e-05, + "loss": 0.3095, + "step": 14940, + "teacher_loss": 0.2881562113761902 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5723336338996887, + "learning_rate": 2.02255151114276e-05, + "loss": 0.4479, + "step": 14941, + "teacher_loss": 0.434123694896698 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4294903874397278, + "learning_rate": 2.0223386077478766e-05, + "loss": 0.2913, + "step": 14942, + "teacher_loss": 0.2759302854537964 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6003793478012085, + "learning_rate": 2.0221256923773345e-05, + "loss": 0.2718, + "step": 14943, + "teacher_loss": 0.23534628748893738 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.12675902247428894, + "learning_rate": 2.0219127650360162e-05, + "loss": 0.1277, + "step": 14944, + "teacher_loss": 0.12781323492527008 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.45469433069229126, + "learning_rate": 2.0216998257288024e-05, + "loss": 0.291, + "step": 14945, + "teacher_loss": 0.27282413840293884 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.3445495367050171, + "learning_rate": 2.0214868744605764e-05, + "loss": 0.1886, + "step": 14946, + "teacher_loss": 0.17132286727428436 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.707584023475647, + "learning_rate": 2.0212739112362194e-05, + "loss": 0.4402, + "step": 14947, + "teacher_loss": 0.4104752540588379 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.42357051372528076, + "learning_rate": 2.0210609360606145e-05, + "loss": 0.2966, + "step": 14948, + "teacher_loss": 0.28252023458480835 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 1.0093250274658203, + "learning_rate": 2.0208479489386445e-05, + "loss": 0.2453, + "step": 14949, + "teacher_loss": 0.16037200391292572 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6762949228286743, + "learning_rate": 2.020634949875193e-05, + "loss": 0.4994, + "step": 14950, + "teacher_loss": 0.47974202036857605 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5987178683280945, + "learning_rate": 2.020421938875143e-05, + "loss": 0.2842, + "step": 14951, + "teacher_loss": 0.2492598593235016 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6745460033416748, + "learning_rate": 2.0202089159433777e-05, + "loss": 0.3873, + "step": 14952, + "teacher_loss": 0.35540324449539185 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.2650189697742462, + "learning_rate": 2.0199958810847823e-05, + "loss": 0.2489, + "step": 14953, + "teacher_loss": 0.24714966118335724 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.5582663416862488, + "learning_rate": 2.01978283430424e-05, + "loss": 0.2583, + "step": 14954, + "teacher_loss": 0.22492723166942596 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.1415107250213623, + "learning_rate": 2.019569775606636e-05, + "loss": 0.1824, + "step": 14955, + "teacher_loss": 0.18693727254867554 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.3144680857658386, + "learning_rate": 2.0193567049968543e-05, + "loss": 0.248, + "step": 14956, + "teacher_loss": 0.24056866765022278 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4739103317260742, + "learning_rate": 2.0191436224797807e-05, + "loss": 0.2721, + "step": 14957, + "teacher_loss": 0.24962449073791504 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.13006094098091125, + "learning_rate": 2.0189305280603005e-05, + "loss": 0.2387, + "step": 14958, + "teacher_loss": 0.2507938742637634 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.6888920068740845, + "learning_rate": 2.018717421743299e-05, + "loss": 0.3612, + "step": 14959, + "teacher_loss": 0.3247529864311218 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.2874410152435303, + "learning_rate": 2.0185043035336617e-05, + "loss": 0.2244, + "step": 14960, + "teacher_loss": 0.21735477447509766 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.11321654915809631, + "learning_rate": 2.018291173436276e-05, + "loss": 0.1598, + "step": 14961, + "teacher_loss": 0.16503086686134338 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.423062801361084, + "learning_rate": 2.0180780314560278e-05, + "loss": 0.2577, + "step": 14962, + "teacher_loss": 0.2392856776714325 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.2879185378551483, + "learning_rate": 2.0178648775978028e-05, + "loss": 0.1797, + "step": 14963, + "teacher_loss": 0.16769476234912872 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4349994361400604, + "learning_rate": 2.0176517118664893e-05, + "loss": 0.1966, + "step": 14964, + "teacher_loss": 0.170067697763443 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.8133188486099243, + "learning_rate": 2.0174385342669737e-05, + "loss": 0.6554, + "step": 14965, + "teacher_loss": 0.6378690004348755 + }, + { + "compression_loss": 0.0, + "epoch": 2.7, + "label_loss": 0.4016261100769043, + "learning_rate": 2.0172253448041443e-05, + "loss": 0.2626, + "step": 14966, + "teacher_loss": 0.24720095098018646 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.309848427772522, + "learning_rate": 2.0170121434828883e-05, + "loss": 0.232, + "step": 14967, + "teacher_loss": 0.22334083914756775 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.3638608157634735, + "learning_rate": 2.0167989303080935e-05, + "loss": 0.2184, + "step": 14968, + "teacher_loss": 0.20221257209777832 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.70854252576828, + "learning_rate": 2.0165857052846496e-05, + "loss": 0.4609, + "step": 14969, + "teacher_loss": 0.4333451986312866 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.537553608417511, + "learning_rate": 2.0163724684174435e-05, + "loss": 0.3886, + "step": 14970, + "teacher_loss": 0.37209880352020264 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7901999950408936, + "learning_rate": 2.0161592197113652e-05, + "loss": 0.3719, + "step": 14971, + "teacher_loss": 0.3253900408744812 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.4182116985321045, + "learning_rate": 2.0159459591713036e-05, + "loss": 0.3722, + "step": 14972, + "teacher_loss": 0.3670896887779236 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.4978400468826294, + "learning_rate": 2.0157326868021474e-05, + "loss": 0.2628, + "step": 14973, + "teacher_loss": 0.23664109408855438 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.30138349533081055, + "learning_rate": 2.0155194026087883e-05, + "loss": 0.202, + "step": 14974, + "teacher_loss": 0.19093114137649536 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.2574589252471924, + "learning_rate": 2.0153061065961134e-05, + "loss": 0.2109, + "step": 14975, + "teacher_loss": 0.20575420558452606 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.22126173973083496, + "learning_rate": 2.0150927987690152e-05, + "loss": 0.1673, + "step": 14976, + "teacher_loss": 0.1612926423549652 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.6109637022018433, + "learning_rate": 2.0148794791323834e-05, + "loss": 0.3405, + "step": 14977, + "teacher_loss": 0.3104754388332367 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.6727228164672852, + "learning_rate": 2.0146661476911084e-05, + "loss": 0.3203, + "step": 14978, + "teacher_loss": 0.28110796213150024 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.815883994102478, + "learning_rate": 2.014452804450082e-05, + "loss": 0.3878, + "step": 14979, + "teacher_loss": 0.3402690291404724 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.636858344078064, + "learning_rate": 2.0142394494141958e-05, + "loss": 0.2753, + "step": 14980, + "teacher_loss": 0.23514223098754883 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.3174043893814087, + "learning_rate": 2.01402608258834e-05, + "loss": 0.2627, + "step": 14981, + "teacher_loss": 0.25658273696899414 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.5861826539039612, + "learning_rate": 2.0138127039774075e-05, + "loss": 0.2581, + "step": 14982, + "teacher_loss": 0.22161316871643066 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7548277378082275, + "learning_rate": 2.01359931358629e-05, + "loss": 0.4608, + "step": 14983, + "teacher_loss": 0.42816829681396484 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.8381767272949219, + "learning_rate": 2.0133859114198803e-05, + "loss": 0.435, + "step": 14984, + "teacher_loss": 0.3902357220649719 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.45744454860687256, + "learning_rate": 2.0131724974830708e-05, + "loss": 0.2484, + "step": 14985, + "teacher_loss": 0.2251608669757843 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7651112079620361, + "learning_rate": 2.0129590717807545e-05, + "loss": 0.3111, + "step": 14986, + "teacher_loss": 0.26064351201057434 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.2947438359260559, + "learning_rate": 2.0127456343178248e-05, + "loss": 0.2453, + "step": 14987, + "teacher_loss": 0.23976153135299683 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.34521710872650146, + "learning_rate": 2.0125321850991748e-05, + "loss": 0.1885, + "step": 14988, + "teacher_loss": 0.17104095220565796 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.35951903462409973, + "learning_rate": 2.0123187241296988e-05, + "loss": 0.2669, + "step": 14989, + "teacher_loss": 0.2566111981868744 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.3678209185600281, + "learning_rate": 2.01210525141429e-05, + "loss": 0.2, + "step": 14990, + "teacher_loss": 0.18132254481315613 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7792915105819702, + "learning_rate": 2.011891766957843e-05, + "loss": 0.2716, + "step": 14991, + "teacher_loss": 0.2152017056941986 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7213821411132812, + "learning_rate": 2.011678270765253e-05, + "loss": 0.3212, + "step": 14992, + "teacher_loss": 0.2767312824726105 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.42048174142837524, + "learning_rate": 2.011464762841414e-05, + "loss": 0.295, + "step": 14993, + "teacher_loss": 0.28108155727386475 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.5271192789077759, + "learning_rate": 2.011251243191222e-05, + "loss": 0.2411, + "step": 14994, + "teacher_loss": 0.20933997631072998 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.33664506673812866, + "learning_rate": 2.0110377118195713e-05, + "loss": 0.2295, + "step": 14995, + "teacher_loss": 0.21758291125297546 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.39082062244415283, + "learning_rate": 2.010824168731358e-05, + "loss": 0.315, + "step": 14996, + "teacher_loss": 0.3065846860408783 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.30067944526672363, + "learning_rate": 2.0106106139314782e-05, + "loss": 0.2156, + "step": 14997, + "teacher_loss": 0.20610946416854858 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.8333945274353027, + "learning_rate": 2.010397047424828e-05, + "loss": 0.4313, + "step": 14998, + "teacher_loss": 0.3866270184516907 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.5261723399162292, + "learning_rate": 2.010183469216304e-05, + "loss": 0.3401, + "step": 14999, + "teacher_loss": 0.31944113969802856 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.15280258655548096, + "learning_rate": 2.009969879310802e-05, + "loss": 0.2276, + "step": 15000, + "teacher_loss": 0.23591607809066772 + }, + { + "epoch": 2.71, + "eval_exact_match": 79.71617786187322, + "eval_f1": 87.17503057374732, + "step": 15000 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.3536294996738434, + "learning_rate": 2.0097562777132202e-05, + "loss": 0.221, + "step": 15001, + "teacher_loss": 0.20631879568099976 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.30649125576019287, + "learning_rate": 2.009542664428455e-05, + "loss": 0.2487, + "step": 15002, + "teacher_loss": 0.24228887259960175 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.4666973352432251, + "learning_rate": 2.0093290394614038e-05, + "loss": 0.25, + "step": 15003, + "teacher_loss": 0.22590233385562897 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.5467059016227722, + "learning_rate": 2.0091154028169655e-05, + "loss": 0.2859, + "step": 15004, + "teacher_loss": 0.2568756937980652 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.1568412482738495, + "learning_rate": 2.0089017545000372e-05, + "loss": 0.2014, + "step": 15005, + "teacher_loss": 0.20640623569488525 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7227605581283569, + "learning_rate": 2.008688094515517e-05, + "loss": 0.3014, + "step": 15006, + "teacher_loss": 0.25458258390426636 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.4704834222793579, + "learning_rate": 2.0084744228683047e-05, + "loss": 0.2567, + "step": 15007, + "teacher_loss": 0.23297539353370667 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.20007620751857758, + "learning_rate": 2.0082607395632978e-05, + "loss": 0.1581, + "step": 15008, + "teacher_loss": 0.15343214571475983 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.9830831289291382, + "learning_rate": 2.008047044605396e-05, + "loss": 0.3883, + "step": 15009, + "teacher_loss": 0.3222392201423645 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.2133117914199829, + "learning_rate": 2.007833337999499e-05, + "loss": 0.1469, + "step": 15010, + "teacher_loss": 0.13955624401569366 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.19274112582206726, + "learning_rate": 2.0076196197505056e-05, + "loss": 0.2294, + "step": 15011, + "teacher_loss": 0.23343610763549805 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.43140918016433716, + "learning_rate": 2.0074058898633164e-05, + "loss": 0.2622, + "step": 15012, + "teacher_loss": 0.24334970116615295 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.24975115060806274, + "learning_rate": 2.007192148342832e-05, + "loss": 0.204, + "step": 15013, + "teacher_loss": 0.19893011450767517 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.19218358397483826, + "learning_rate": 2.006978395193952e-05, + "loss": 0.1982, + "step": 15014, + "teacher_loss": 0.19886913895606995 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.8757625222206116, + "learning_rate": 2.006764630421577e-05, + "loss": 0.3259, + "step": 15015, + "teacher_loss": 0.26476386189460754 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.2087867259979248, + "learning_rate": 2.006550854030609e-05, + "loss": 0.1731, + "step": 15016, + "teacher_loss": 0.16912755370140076 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.5303830504417419, + "learning_rate": 2.006337066025948e-05, + "loss": 0.219, + "step": 15017, + "teacher_loss": 0.18441784381866455 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.469784140586853, + "learning_rate": 2.0061232664124966e-05, + "loss": 0.3161, + "step": 15018, + "teacher_loss": 0.29902392625808716 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.17396605014801025, + "learning_rate": 2.005909455195156e-05, + "loss": 0.237, + "step": 15019, + "teacher_loss": 0.2440508008003235 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.5754035711288452, + "learning_rate": 2.0056956323788282e-05, + "loss": 0.2102, + "step": 15020, + "teacher_loss": 0.16965632140636444 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.7735744714736938, + "learning_rate": 2.005481797968416e-05, + "loss": 0.6237, + "step": 15021, + "teacher_loss": 0.6070912480354309 + }, + { + "compression_loss": 0.0, + "epoch": 2.71, + "label_loss": 0.31068155169487, + "learning_rate": 2.0052679519688216e-05, + "loss": 0.1982, + "step": 15022, + "teacher_loss": 0.18568764626979828 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.16337335109710693, + "learning_rate": 2.0050540943849477e-05, + "loss": 0.2176, + "step": 15023, + "teacher_loss": 0.2235870063304901 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.2505873441696167, + "learning_rate": 2.004840225221698e-05, + "loss": 0.1784, + "step": 15024, + "teacher_loss": 0.17039605975151062 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4637059271335602, + "learning_rate": 2.0046263444839752e-05, + "loss": 0.2269, + "step": 15025, + "teacher_loss": 0.20060932636260986 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.796784520149231, + "learning_rate": 2.0044124521766836e-05, + "loss": 0.3761, + "step": 15026, + "teacher_loss": 0.3293894827365875 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.8234351277351379, + "learning_rate": 2.0041985483047265e-05, + "loss": 0.4887, + "step": 15027, + "teacher_loss": 0.45147186517715454 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.7973535656929016, + "learning_rate": 2.0039846328730082e-05, + "loss": 0.3512, + "step": 15028, + "teacher_loss": 0.30161386728286743 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.6982119679450989, + "learning_rate": 2.0037707058864343e-05, + "loss": 0.2315, + "step": 15029, + "teacher_loss": 0.17969286441802979 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.471706748008728, + "learning_rate": 2.0035567673499073e-05, + "loss": 0.2346, + "step": 15030, + "teacher_loss": 0.20824161171913147 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.39602822065353394, + "learning_rate": 2.0033428172683333e-05, + "loss": 0.2095, + "step": 15031, + "teacher_loss": 0.18878960609436035 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.9143403768539429, + "learning_rate": 2.003128855646619e-05, + "loss": 0.5763, + "step": 15032, + "teacher_loss": 0.538723349571228 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.1988438367843628, + "learning_rate": 2.0029148824896672e-05, + "loss": 0.1773, + "step": 15033, + "teacher_loss": 0.17487892508506775 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4166041910648346, + "learning_rate": 2.0027008978023853e-05, + "loss": 0.2585, + "step": 15034, + "teacher_loss": 0.2409186214208603 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.5560095906257629, + "learning_rate": 2.0024869015896793e-05, + "loss": 0.3295, + "step": 15035, + "teacher_loss": 0.3043414354324341 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4813695549964905, + "learning_rate": 2.0022728938564548e-05, + "loss": 0.2326, + "step": 15036, + "teacher_loss": 0.20496976375579834 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.47685176134109497, + "learning_rate": 2.002058874607619e-05, + "loss": 0.2391, + "step": 15037, + "teacher_loss": 0.21265339851379395 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.7392712831497192, + "learning_rate": 2.0018448438480784e-05, + "loss": 0.2811, + "step": 15038, + "teacher_loss": 0.2301851511001587 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.28946763277053833, + "learning_rate": 2.0016308015827402e-05, + "loss": 0.1964, + "step": 15039, + "teacher_loss": 0.18610183894634247 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.3137316107749939, + "learning_rate": 2.0014167478165117e-05, + "loss": 0.2067, + "step": 15040, + "teacher_loss": 0.19480668008327484 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4612748324871063, + "learning_rate": 2.0012026825543002e-05, + "loss": 0.2401, + "step": 15041, + "teacher_loss": 0.215494304895401 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.3548908531665802, + "learning_rate": 2.0009886058010137e-05, + "loss": 0.2915, + "step": 15042, + "teacher_loss": 0.284482479095459 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.22971507906913757, + "learning_rate": 2.000774517561561e-05, + "loss": 0.2425, + "step": 15043, + "teacher_loss": 0.2439051866531372 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4746737778186798, + "learning_rate": 2.0005604178408498e-05, + "loss": 0.2731, + "step": 15044, + "teacher_loss": 0.2507379651069641 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4142010807991028, + "learning_rate": 2.000346306643789e-05, + "loss": 0.1577, + "step": 15045, + "teacher_loss": 0.12919777631759644 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.26875972747802734, + "learning_rate": 2.0001321839752877e-05, + "loss": 0.1826, + "step": 15046, + "teacher_loss": 0.17305982112884521 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.5402956008911133, + "learning_rate": 1.9999180498402547e-05, + "loss": 0.3649, + "step": 15047, + "teacher_loss": 0.34537264704704285 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4461023807525635, + "learning_rate": 1.9997039042435997e-05, + "loss": 0.2905, + "step": 15048, + "teacher_loss": 0.2732193171977997 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.2971504330635071, + "learning_rate": 1.9994897471902325e-05, + "loss": 0.1938, + "step": 15049, + "teacher_loss": 0.18228499591350555 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.6897265911102295, + "learning_rate": 1.9992755786850626e-05, + "loss": 0.3493, + "step": 15050, + "teacher_loss": 0.3114684224128723 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.5948957800865173, + "learning_rate": 1.999061398733001e-05, + "loss": 0.4352, + "step": 15051, + "teacher_loss": 0.41743582487106323 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.36130496859550476, + "learning_rate": 1.9988472073389578e-05, + "loss": 0.2496, + "step": 15052, + "teacher_loss": 0.23721513152122498 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4595414698123932, + "learning_rate": 1.998633004507843e-05, + "loss": 0.2672, + "step": 15053, + "teacher_loss": 0.2458733767271042 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.42380625009536743, + "learning_rate": 1.9984187902445696e-05, + "loss": 0.2295, + "step": 15054, + "teacher_loss": 0.20793843269348145 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.44624564051628113, + "learning_rate": 1.9982045645540464e-05, + "loss": 0.442, + "step": 15055, + "teacher_loss": 0.44155222177505493 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.34407877922058105, + "learning_rate": 1.9979903274411864e-05, + "loss": 0.2251, + "step": 15056, + "teacher_loss": 0.21184967458248138 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.2622450292110443, + "learning_rate": 1.9977760789109024e-05, + "loss": 0.2703, + "step": 15057, + "teacher_loss": 0.2712153196334839 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.24984706938266754, + "learning_rate": 1.9975618189681043e-05, + "loss": 0.2316, + "step": 15058, + "teacher_loss": 0.22957536578178406 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4520396590232849, + "learning_rate": 1.9973475476177057e-05, + "loss": 0.2484, + "step": 15059, + "teacher_loss": 0.22573867440223694 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.375400185585022, + "learning_rate": 1.997133264864619e-05, + "loss": 0.3091, + "step": 15060, + "teacher_loss": 0.30178260803222656 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 1.1193232536315918, + "learning_rate": 1.996918970713757e-05, + "loss": 0.6195, + "step": 15061, + "teacher_loss": 0.5639264583587646 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.5159069299697876, + "learning_rate": 1.9967046651700332e-05, + "loss": 0.2446, + "step": 15062, + "teacher_loss": 0.2144462913274765 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.6716480255126953, + "learning_rate": 1.99649034823836e-05, + "loss": 0.5184, + "step": 15063, + "teacher_loss": 0.5013296008110046 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4155537486076355, + "learning_rate": 1.996276019923652e-05, + "loss": 0.2657, + "step": 15064, + "teacher_loss": 0.2490987777709961 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.2730403542518616, + "learning_rate": 1.996061680230823e-05, + "loss": 0.2524, + "step": 15065, + "teacher_loss": 0.2500574290752411 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.6909595131874084, + "learning_rate": 1.9958473291647865e-05, + "loss": 0.2662, + "step": 15066, + "teacher_loss": 0.21902316808700562 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.30844852328300476, + "learning_rate": 1.9956329667304577e-05, + "loss": 0.2243, + "step": 15067, + "teacher_loss": 0.21495559811592102 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.16114582121372223, + "learning_rate": 1.995418592932751e-05, + "loss": 0.1739, + "step": 15068, + "teacher_loss": 0.1752689778804779 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.4944458603858948, + "learning_rate": 1.995204207776581e-05, + "loss": 0.283, + "step": 15069, + "teacher_loss": 0.2594802975654602 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.8666689991950989, + "learning_rate": 1.9949898112668634e-05, + "loss": 0.3587, + "step": 15070, + "teacher_loss": 0.30222347378730774 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.38930201530456543, + "learning_rate": 1.9947754034085134e-05, + "loss": 0.3235, + "step": 15071, + "teacher_loss": 0.3161659836769104 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.48068174719810486, + "learning_rate": 1.9945609842064468e-05, + "loss": 0.2449, + "step": 15072, + "teacher_loss": 0.21873216331005096 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.22470299899578094, + "learning_rate": 1.9943465536655793e-05, + "loss": 0.1702, + "step": 15073, + "teacher_loss": 0.16413387656211853 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.22504305839538574, + "learning_rate": 1.994132111790828e-05, + "loss": 0.2852, + "step": 15074, + "teacher_loss": 0.2918716073036194 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.25561243295669556, + "learning_rate": 1.9939176585871085e-05, + "loss": 0.209, + "step": 15075, + "teacher_loss": 0.20380191504955292 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.7776265144348145, + "learning_rate": 1.993703194059338e-05, + "loss": 0.2858, + "step": 15076, + "teacher_loss": 0.23111248016357422 + }, + { + "compression_loss": 0.0, + "epoch": 2.72, + "label_loss": 0.41156893968582153, + "learning_rate": 1.9934887182124337e-05, + "loss": 0.3208, + "step": 15077, + "teacher_loss": 0.3106657564640045 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5161766409873962, + "learning_rate": 1.993274231051313e-05, + "loss": 0.2744, + "step": 15078, + "teacher_loss": 0.2475668489933014 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.9737175703048706, + "learning_rate": 1.993059732580892e-05, + "loss": 0.5216, + "step": 15079, + "teacher_loss": 0.4713541269302368 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5958664417266846, + "learning_rate": 1.9928452228060903e-05, + "loss": 0.2761, + "step": 15080, + "teacher_loss": 0.24053451418876648 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.680786669254303, + "learning_rate": 1.992630701731825e-05, + "loss": 0.2853, + "step": 15081, + "teacher_loss": 0.24131283164024353 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.24362145364284515, + "learning_rate": 1.992416169363015e-05, + "loss": 0.2423, + "step": 15082, + "teacher_loss": 0.24220535159111023 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.8904927372932434, + "learning_rate": 1.9922016257045782e-05, + "loss": 0.3639, + "step": 15083, + "teacher_loss": 0.3054329454898834 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.1477115899324417, + "learning_rate": 1.991987070761434e-05, + "loss": 0.1593, + "step": 15084, + "teacher_loss": 0.16060245037078857 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.42675545811653137, + "learning_rate": 1.9917725045385017e-05, + "loss": 0.3022, + "step": 15085, + "teacher_loss": 0.28839045763015747 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5468289852142334, + "learning_rate": 1.9915579270406993e-05, + "loss": 0.2665, + "step": 15086, + "teacher_loss": 0.23534198105335236 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.8861464262008667, + "learning_rate": 1.9913433382729488e-05, + "loss": 0.4564, + "step": 15087, + "teacher_loss": 0.40859711170196533 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.18965277075767517, + "learning_rate": 1.9911287382401675e-05, + "loss": 0.1957, + "step": 15088, + "teacher_loss": 0.1964040994644165 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.4445428252220154, + "learning_rate": 1.9909141269472772e-05, + "loss": 0.4737, + "step": 15089, + "teacher_loss": 0.47697240114212036 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.3543934226036072, + "learning_rate": 1.9906995043991978e-05, + "loss": 0.2033, + "step": 15090, + "teacher_loss": 0.18648485839366913 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.2961617112159729, + "learning_rate": 1.9904848706008498e-05, + "loss": 0.1925, + "step": 15091, + "teacher_loss": 0.18095944821834564 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.35567817091941833, + "learning_rate": 1.990270225557155e-05, + "loss": 0.2438, + "step": 15092, + "teacher_loss": 0.2313770353794098 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.4605990946292877, + "learning_rate": 1.9900555692730325e-05, + "loss": 0.339, + "step": 15093, + "teacher_loss": 0.32544130086898804 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5304573178291321, + "learning_rate": 1.989840901753406e-05, + "loss": 0.2668, + "step": 15094, + "teacher_loss": 0.23751601576805115 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.935044527053833, + "learning_rate": 1.989626223003196e-05, + "loss": 0.3426, + "step": 15095, + "teacher_loss": 0.2767980694770813 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.24674253165721893, + "learning_rate": 1.9894115330273245e-05, + "loss": 0.2247, + "step": 15096, + "teacher_loss": 0.22226402163505554 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.544734001159668, + "learning_rate": 1.9891968318307143e-05, + "loss": 0.2859, + "step": 15097, + "teacher_loss": 0.25711536407470703 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.18568967282772064, + "learning_rate": 1.988982119418287e-05, + "loss": 0.2077, + "step": 15098, + "teacher_loss": 0.210148423910141 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.3483494520187378, + "learning_rate": 1.988767395794966e-05, + "loss": 0.3527, + "step": 15099, + "teacher_loss": 0.3531569540500641 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.33492332696914673, + "learning_rate": 1.988552660965674e-05, + "loss": 0.3481, + "step": 15100, + "teacher_loss": 0.3495703339576721 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.30936503410339355, + "learning_rate": 1.988337914935334e-05, + "loss": 0.2308, + "step": 15101, + "teacher_loss": 0.22202084958553314 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5264618992805481, + "learning_rate": 1.98812315770887e-05, + "loss": 0.2171, + "step": 15102, + "teacher_loss": 0.18275868892669678 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.4199342429637909, + "learning_rate": 1.9879083892912054e-05, + "loss": 0.2189, + "step": 15103, + "teacher_loss": 0.19651193916797638 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.31059807538986206, + "learning_rate": 1.987693609687264e-05, + "loss": 0.1869, + "step": 15104, + "teacher_loss": 0.17317906022071838 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.4697466492652893, + "learning_rate": 1.9874788189019707e-05, + "loss": 0.2632, + "step": 15105, + "teacher_loss": 0.24027252197265625 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.17916375398635864, + "learning_rate": 1.9872640169402496e-05, + "loss": 0.2371, + "step": 15106, + "teacher_loss": 0.24357305467128754 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.298399955034256, + "learning_rate": 1.9870492038070255e-05, + "loss": 0.2646, + "step": 15107, + "teacher_loss": 0.2608868479728699 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.2749473452568054, + "learning_rate": 1.9868343795072228e-05, + "loss": 0.1871, + "step": 15108, + "teacher_loss": 0.17735815048217773 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.46526551246643066, + "learning_rate": 1.986619544045768e-05, + "loss": 0.2129, + "step": 15109, + "teacher_loss": 0.1848325878381729 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5566456317901611, + "learning_rate": 1.986404697427586e-05, + "loss": 0.293, + "step": 15110, + "teacher_loss": 0.2636886239051819 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.7540261745452881, + "learning_rate": 1.9861898396576023e-05, + "loss": 0.3267, + "step": 15111, + "teacher_loss": 0.2791852056980133 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.5506866574287415, + "learning_rate": 1.9859749707407436e-05, + "loss": 0.2305, + "step": 15112, + "teacher_loss": 0.1949162483215332 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.2481362521648407, + "learning_rate": 1.9857600906819356e-05, + "loss": 0.1977, + "step": 15113, + "teacher_loss": 0.19209900498390198 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.11785888671875, + "learning_rate": 1.985545199486105e-05, + "loss": 0.1877, + "step": 15114, + "teacher_loss": 0.1954491138458252 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.18139125406742096, + "learning_rate": 1.985330297158179e-05, + "loss": 0.2199, + "step": 15115, + "teacher_loss": 0.22421470284461975 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.4692199230194092, + "learning_rate": 1.985115383703084e-05, + "loss": 0.255, + "step": 15116, + "teacher_loss": 0.23124787211418152 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.21738798916339874, + "learning_rate": 1.9849004591257488e-05, + "loss": 0.22, + "step": 15117, + "teacher_loss": 0.22025524079799652 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.2938863933086395, + "learning_rate": 1.9846855234310985e-05, + "loss": 0.1928, + "step": 15118, + "teacher_loss": 0.18157872557640076 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.3548092544078827, + "learning_rate": 1.984470576624063e-05, + "loss": 0.2173, + "step": 15119, + "teacher_loss": 0.2020546793937683 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.33035704493522644, + "learning_rate": 1.9842556187095695e-05, + "loss": 0.3313, + "step": 15120, + "teacher_loss": 0.33141976594924927 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.3157961368560791, + "learning_rate": 1.9840406496925465e-05, + "loss": 0.2067, + "step": 15121, + "teacher_loss": 0.19453680515289307 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.6413888335227966, + "learning_rate": 1.983825669577923e-05, + "loss": 0.2912, + "step": 15122, + "teacher_loss": 0.2522585093975067 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.708191990852356, + "learning_rate": 1.983610678370627e-05, + "loss": 0.7878, + "step": 15123, + "teacher_loss": 0.7966430187225342 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.8005844354629517, + "learning_rate": 1.9833956760755882e-05, + "loss": 0.3203, + "step": 15124, + "teacher_loss": 0.2668999433517456 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.26291677355766296, + "learning_rate": 1.983180662697736e-05, + "loss": 0.2303, + "step": 15125, + "teacher_loss": 0.2266272008419037 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.25841909646987915, + "learning_rate": 1.9829656382419997e-05, + "loss": 0.3125, + "step": 15126, + "teacher_loss": 0.3185466527938843 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.28554433584213257, + "learning_rate": 1.9827506027133094e-05, + "loss": 0.202, + "step": 15127, + "teacher_loss": 0.19270142912864685 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.7003921270370483, + "learning_rate": 1.9825355561165953e-05, + "loss": 0.2623, + "step": 15128, + "teacher_loss": 0.21357497572898865 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.1778913140296936, + "learning_rate": 1.9823204984567873e-05, + "loss": 0.1686, + "step": 15129, + "teacher_loss": 0.16754662990570068 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.7207590937614441, + "learning_rate": 1.982105429738817e-05, + "loss": 0.2455, + "step": 15130, + "teacher_loss": 0.1927299201488495 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.4375700354576111, + "learning_rate": 1.981890349967614e-05, + "loss": 0.2488, + "step": 15131, + "teacher_loss": 0.2278408408164978 + }, + { + "compression_loss": 0.0, + "epoch": 2.73, + "label_loss": 0.21476247906684875, + "learning_rate": 1.98167525914811e-05, + "loss": 0.1755, + "step": 15132, + "teacher_loss": 0.1711852252483368 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.19516701996326447, + "learning_rate": 1.9814601572852368e-05, + "loss": 0.2367, + "step": 15133, + "teacher_loss": 0.24130795896053314 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.44253799319267273, + "learning_rate": 1.981245044383925e-05, + "loss": 0.1928, + "step": 15134, + "teacher_loss": 0.16509635746479034 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.23452308773994446, + "learning_rate": 1.981029920449108e-05, + "loss": 0.2802, + "step": 15135, + "teacher_loss": 0.28531378507614136 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.29700279235839844, + "learning_rate": 1.980814785485717e-05, + "loss": 0.2097, + "step": 15136, + "teacher_loss": 0.19998842477798462 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.7240496873855591, + "learning_rate": 1.9805996394986844e-05, + "loss": 0.4021, + "step": 15137, + "teacher_loss": 0.366362065076828 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4322538375854492, + "learning_rate": 1.9803844824929425e-05, + "loss": 0.2576, + "step": 15138, + "teacher_loss": 0.2382260411977768 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.13878756761550903, + "learning_rate": 1.9801693144734256e-05, + "loss": 0.1624, + "step": 15139, + "teacher_loss": 0.16496823728084564 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4488065242767334, + "learning_rate": 1.9799541354450652e-05, + "loss": 0.2146, + "step": 15140, + "teacher_loss": 0.18863120675086975 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.17453351616859436, + "learning_rate": 1.9797389454127957e-05, + "loss": 0.1358, + "step": 15141, + "teacher_loss": 0.13149572908878326 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.48861896991729736, + "learning_rate": 1.9795237443815507e-05, + "loss": 0.475, + "step": 15142, + "teacher_loss": 0.47346892952919006 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5806810855865479, + "learning_rate": 1.9793085323562633e-05, + "loss": 0.2578, + "step": 15143, + "teacher_loss": 0.221955806016922 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.18329587578773499, + "learning_rate": 1.9790933093418692e-05, + "loss": 0.1693, + "step": 15144, + "teacher_loss": 0.1677565574645996 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.8105214834213257, + "learning_rate": 1.9788780753433016e-05, + "loss": 0.368, + "step": 15145, + "teacher_loss": 0.3188292384147644 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.6586999893188477, + "learning_rate": 1.978662830365495e-05, + "loss": 0.2157, + "step": 15146, + "teacher_loss": 0.16644065082073212 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4170007109642029, + "learning_rate": 1.9784475744133855e-05, + "loss": 0.3969, + "step": 15147, + "teacher_loss": 0.39466798305511475 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.6390199661254883, + "learning_rate": 1.9782323074919066e-05, + "loss": 0.3034, + "step": 15148, + "teacher_loss": 0.2661534547805786 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.32657966017723083, + "learning_rate": 1.978017029605996e-05, + "loss": 0.2315, + "step": 15149, + "teacher_loss": 0.22088859975337982 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4098842740058899, + "learning_rate": 1.977801740760587e-05, + "loss": 0.2898, + "step": 15150, + "teacher_loss": 0.2765064239501953 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4801176190376282, + "learning_rate": 1.977586440960617e-05, + "loss": 0.3448, + "step": 15151, + "teacher_loss": 0.3297881484031677 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5493592023849487, + "learning_rate": 1.977371130211022e-05, + "loss": 0.2631, + "step": 15152, + "teacher_loss": 0.23129115998744965 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4405379891395569, + "learning_rate": 1.977155808516738e-05, + "loss": 0.2348, + "step": 15153, + "teacher_loss": 0.21195551753044128 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4689837098121643, + "learning_rate": 1.9769404758827017e-05, + "loss": 0.2075, + "step": 15154, + "teacher_loss": 0.1784050464630127 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.6527721285820007, + "learning_rate": 1.9767251323138508e-05, + "loss": 0.4971, + "step": 15155, + "teacher_loss": 0.4797801375389099 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.23711979389190674, + "learning_rate": 1.976509777815121e-05, + "loss": 0.212, + "step": 15156, + "teacher_loss": 0.20919150114059448 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 1.0806182622909546, + "learning_rate": 1.9762944123914517e-05, + "loss": 0.7386, + "step": 15157, + "teacher_loss": 0.7006382346153259 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 1.246617078781128, + "learning_rate": 1.9760790360477786e-05, + "loss": 0.3176, + "step": 15158, + "teacher_loss": 0.21440258622169495 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.17018583416938782, + "learning_rate": 1.9758636487890408e-05, + "loss": 0.2284, + "step": 15159, + "teacher_loss": 0.2348358929157257 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4834911823272705, + "learning_rate": 1.975648250620177e-05, + "loss": 0.2456, + "step": 15160, + "teacher_loss": 0.21916499733924866 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.7042248845100403, + "learning_rate": 1.9754328415461243e-05, + "loss": 0.5092, + "step": 15161, + "teacher_loss": 0.4874890446662903 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4873436987400055, + "learning_rate": 1.9752174215718217e-05, + "loss": 0.3298, + "step": 15162, + "teacher_loss": 0.3122524917125702 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.18083898723125458, + "learning_rate": 1.975001990702209e-05, + "loss": 0.2131, + "step": 15163, + "teacher_loss": 0.21665626764297485 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5595321655273438, + "learning_rate": 1.9747865489422242e-05, + "loss": 0.2676, + "step": 15164, + "teacher_loss": 0.2351897805929184 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5304072499275208, + "learning_rate": 1.9745710962968075e-05, + "loss": 0.281, + "step": 15165, + "teacher_loss": 0.2532690465450287 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.23622167110443115, + "learning_rate": 1.974355632770899e-05, + "loss": 0.2966, + "step": 15166, + "teacher_loss": 0.30329370498657227 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5728350877761841, + "learning_rate": 1.974140158369437e-05, + "loss": 0.2289, + "step": 15167, + "teacher_loss": 0.19067008793354034 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.37634527683258057, + "learning_rate": 1.9739246730973633e-05, + "loss": 0.1893, + "step": 15168, + "teacher_loss": 0.1685633361339569 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.30013829469680786, + "learning_rate": 1.973709176959618e-05, + "loss": 0.2164, + "step": 15169, + "teacher_loss": 0.20713499188423157 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 1.0726590156555176, + "learning_rate": 1.973493669961141e-05, + "loss": 0.2688, + "step": 15170, + "teacher_loss": 0.17949554324150085 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.43128281831741333, + "learning_rate": 1.973278152106874e-05, + "loss": 0.1854, + "step": 15171, + "teacher_loss": 0.15809732675552368 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.2755378484725952, + "learning_rate": 1.973062623401758e-05, + "loss": 0.2203, + "step": 15172, + "teacher_loss": 0.21418413519859314 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.3010275959968567, + "learning_rate": 1.972847083850734e-05, + "loss": 0.1941, + "step": 15173, + "teacher_loss": 0.18224990367889404 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5239337086677551, + "learning_rate": 1.9726315334587446e-05, + "loss": 0.2558, + "step": 15174, + "teacher_loss": 0.22604581713676453 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4318421185016632, + "learning_rate": 1.9724159722307306e-05, + "loss": 0.2336, + "step": 15175, + "teacher_loss": 0.21155688166618347 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.2940051555633545, + "learning_rate": 1.972200400171635e-05, + "loss": 0.1698, + "step": 15176, + "teacher_loss": 0.15604063868522644 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5416123867034912, + "learning_rate": 1.9719848172864004e-05, + "loss": 0.3919, + "step": 15177, + "teacher_loss": 0.37521782517433167 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5701338648796082, + "learning_rate": 1.9717692235799682e-05, + "loss": 0.2458, + "step": 15178, + "teacher_loss": 0.20977017283439636 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.36374589800834656, + "learning_rate": 1.971553619057282e-05, + "loss": 0.2067, + "step": 15179, + "teacher_loss": 0.18928195536136627 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.4188700318336487, + "learning_rate": 1.9713380037232863e-05, + "loss": 0.264, + "step": 15180, + "teacher_loss": 0.24675922095775604 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.24901972711086273, + "learning_rate": 1.9711223775829225e-05, + "loss": 0.2156, + "step": 15181, + "teacher_loss": 0.21192272007465363 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.22387628257274628, + "learning_rate": 1.9709067406411352e-05, + "loss": 0.158, + "step": 15182, + "teacher_loss": 0.15069857239723206 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5138344764709473, + "learning_rate": 1.9706910929028684e-05, + "loss": 0.35, + "step": 15183, + "teacher_loss": 0.33182626962661743 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5507846474647522, + "learning_rate": 1.9704754343730655e-05, + "loss": 0.2894, + "step": 15184, + "teacher_loss": 0.2603911757469177 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.609535813331604, + "learning_rate": 1.9702597650566723e-05, + "loss": 0.2835, + "step": 15185, + "teacher_loss": 0.2472611963748932 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.34522104263305664, + "learning_rate": 1.9700440849586316e-05, + "loss": 0.1808, + "step": 15186, + "teacher_loss": 0.1625167727470398 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.5362224578857422, + "learning_rate": 1.9698283940838896e-05, + "loss": 0.4922, + "step": 15187, + "teacher_loss": 0.48734956979751587 + }, + { + "compression_loss": 0.0, + "epoch": 2.74, + "label_loss": 0.9018310904502869, + "learning_rate": 1.9696126924373917e-05, + "loss": 0.417, + "step": 15188, + "teacher_loss": 0.3631435036659241 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.20408844947814941, + "learning_rate": 1.9693969800240825e-05, + "loss": 0.198, + "step": 15189, + "teacher_loss": 0.19735214114189148 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.29109904170036316, + "learning_rate": 1.9691812568489076e-05, + "loss": 0.2257, + "step": 15190, + "teacher_loss": 0.21845673024654388 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.4749983251094818, + "learning_rate": 1.9689655229168134e-05, + "loss": 0.2314, + "step": 15191, + "teacher_loss": 0.20435748994350433 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.11566022038459778, + "learning_rate": 1.9687497782327456e-05, + "loss": 0.2273, + "step": 15192, + "teacher_loss": 0.23967286944389343 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.487579882144928, + "learning_rate": 1.968534022801651e-05, + "loss": 0.2777, + "step": 15193, + "teacher_loss": 0.25442981719970703 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.21503601968288422, + "learning_rate": 1.9683182566284753e-05, + "loss": 0.2347, + "step": 15194, + "teacher_loss": 0.23686090111732483 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.17470958828926086, + "learning_rate": 1.968102479718167e-05, + "loss": 0.1602, + "step": 15195, + "teacher_loss": 0.1586049348115921 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3236986994743347, + "learning_rate": 1.967886692075672e-05, + "loss": 0.1934, + "step": 15196, + "teacher_loss": 0.17886868119239807 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.4406489431858063, + "learning_rate": 1.967670893705938e-05, + "loss": 0.2463, + "step": 15197, + "teacher_loss": 0.22465577721595764 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.6793450117111206, + "learning_rate": 1.967455084613912e-05, + "loss": 0.2646, + "step": 15198, + "teacher_loss": 0.21854379773139954 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2680261731147766, + "learning_rate": 1.967239264804543e-05, + "loss": 0.2216, + "step": 15199, + "teacher_loss": 0.21641956269741058 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.23984336853027344, + "learning_rate": 1.9670234342827783e-05, + "loss": 0.2353, + "step": 15200, + "teacher_loss": 0.23474860191345215 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.09922236204147339, + "learning_rate": 1.966807593053566e-05, + "loss": 0.1285, + "step": 15201, + "teacher_loss": 0.13175831735134125 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.6094710826873779, + "learning_rate": 1.9665917411218562e-05, + "loss": 0.2467, + "step": 15202, + "teacher_loss": 0.2063438594341278 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.28626570105552673, + "learning_rate": 1.966375878492596e-05, + "loss": 0.2251, + "step": 15203, + "teacher_loss": 0.21829816699028015 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.47271013259887695, + "learning_rate": 1.9661600051707355e-05, + "loss": 0.5874, + "step": 15204, + "teacher_loss": 0.6001725196838379 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 1.128417730331421, + "learning_rate": 1.9659441211612234e-05, + "loss": 0.2597, + "step": 15205, + "teacher_loss": 0.16312867403030396 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.6140072345733643, + "learning_rate": 1.9657282264690095e-05, + "loss": 0.2533, + "step": 15206, + "teacher_loss": 0.21319587528705597 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2668343782424927, + "learning_rate": 1.965512321099044e-05, + "loss": 0.2536, + "step": 15207, + "teacher_loss": 0.25213319063186646 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.7780238389968872, + "learning_rate": 1.9652964050562766e-05, + "loss": 0.2169, + "step": 15208, + "teacher_loss": 0.1545523703098297 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.7898622155189514, + "learning_rate": 1.9650804783456575e-05, + "loss": 0.569, + "step": 15209, + "teacher_loss": 0.5444784164428711 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.7480657696723938, + "learning_rate": 1.964864540972138e-05, + "loss": 0.3207, + "step": 15210, + "teacher_loss": 0.27317607402801514 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.426583468914032, + "learning_rate": 1.9646485929406676e-05, + "loss": 0.4055, + "step": 15211, + "teacher_loss": 0.40312451124191284 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2943565845489502, + "learning_rate": 1.9644326342561983e-05, + "loss": 0.1782, + "step": 15212, + "teacher_loss": 0.16524159908294678 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.4286937117576599, + "learning_rate": 1.964216664923681e-05, + "loss": 0.1984, + "step": 15213, + "teacher_loss": 0.17279568314552307 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.6582702398300171, + "learning_rate": 1.964000684948067e-05, + "loss": 0.2786, + "step": 15214, + "teacher_loss": 0.2364538609981537 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.18538734316825867, + "learning_rate": 1.9637846943343093e-05, + "loss": 0.1418, + "step": 15215, + "teacher_loss": 0.13701054453849792 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.44110599160194397, + "learning_rate": 1.9635686930873587e-05, + "loss": 0.3109, + "step": 15216, + "teacher_loss": 0.29638004302978516 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.6912287473678589, + "learning_rate": 1.963352681212168e-05, + "loss": 0.545, + "step": 15217, + "teacher_loss": 0.5287714004516602 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.23366637527942657, + "learning_rate": 1.9631366587136893e-05, + "loss": 0.1803, + "step": 15218, + "teacher_loss": 0.17433884739875793 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3281964063644409, + "learning_rate": 1.9629206255968754e-05, + "loss": 0.202, + "step": 15219, + "teacher_loss": 0.18794460594654083 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.4137471318244934, + "learning_rate": 1.9627045818666796e-05, + "loss": 0.2501, + "step": 15220, + "teacher_loss": 0.23189963400363922 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.7862658500671387, + "learning_rate": 1.9624885275280553e-05, + "loss": 0.3132, + "step": 15221, + "teacher_loss": 0.26060980558395386 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2272559106349945, + "learning_rate": 1.9622724625859553e-05, + "loss": 0.2102, + "step": 15222, + "teacher_loss": 0.2083524465560913 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.1550198644399643, + "learning_rate": 1.962056387045334e-05, + "loss": 0.1537, + "step": 15223, + "teacher_loss": 0.1535821259021759 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.4116232395172119, + "learning_rate": 1.961840300911145e-05, + "loss": 0.2618, + "step": 15224, + "teacher_loss": 0.2451116293668747 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2854640483856201, + "learning_rate": 1.961624204188342e-05, + "loss": 0.2055, + "step": 15225, + "teacher_loss": 0.196599543094635 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2560485005378723, + "learning_rate": 1.9614080968818807e-05, + "loss": 0.3901, + "step": 15226, + "teacher_loss": 0.4049775302410126 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3621958792209625, + "learning_rate": 1.9611919789967148e-05, + "loss": 0.1969, + "step": 15227, + "teacher_loss": 0.17855460941791534 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.33230215311050415, + "learning_rate": 1.9609758505377995e-05, + "loss": 0.3265, + "step": 15228, + "teacher_loss": 0.3258967399597168 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3078997731208801, + "learning_rate": 1.9607597115100905e-05, + "loss": 0.3191, + "step": 15229, + "teacher_loss": 0.32038360834121704 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2900291085243225, + "learning_rate": 1.9605435619185424e-05, + "loss": 0.2159, + "step": 15230, + "teacher_loss": 0.20765212178230286 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3844910264015198, + "learning_rate": 1.960327401768111e-05, + "loss": 0.1709, + "step": 15231, + "teacher_loss": 0.14715400338172913 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3461059331893921, + "learning_rate": 1.9601112310637526e-05, + "loss": 0.2043, + "step": 15232, + "teacher_loss": 0.1885887086391449 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.31589266657829285, + "learning_rate": 1.959895049810423e-05, + "loss": 0.25, + "step": 15233, + "teacher_loss": 0.24262520670890808 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.41189220547676086, + "learning_rate": 1.959678858013079e-05, + "loss": 0.2762, + "step": 15234, + "teacher_loss": 0.261086642742157 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.2599945664405823, + "learning_rate": 1.9594626556766772e-05, + "loss": 0.2345, + "step": 15235, + "teacher_loss": 0.23168711364269257 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.3744836449623108, + "learning_rate": 1.9592464428061734e-05, + "loss": 0.2579, + "step": 15236, + "teacher_loss": 0.24489115178585052 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.6489717364311218, + "learning_rate": 1.9590302194065267e-05, + "loss": 0.2525, + "step": 15237, + "teacher_loss": 0.20842532813549042 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 1.0973368883132935, + "learning_rate": 1.9588139854826927e-05, + "loss": 0.342, + "step": 15238, + "teacher_loss": 0.2580951750278473 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.25282567739486694, + "learning_rate": 1.9585977410396295e-05, + "loss": 0.2795, + "step": 15239, + "teacher_loss": 0.282443106174469 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.28659236431121826, + "learning_rate": 1.9583814860822954e-05, + "loss": 0.2107, + "step": 15240, + "teacher_loss": 0.20226648449897766 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.7333790063858032, + "learning_rate": 1.9581652206156477e-05, + "loss": 0.2816, + "step": 15241, + "teacher_loss": 0.2314022332429886 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.45125460624694824, + "learning_rate": 1.9579489446446456e-05, + "loss": 0.3062, + "step": 15242, + "teacher_loss": 0.290036678314209 + }, + { + "compression_loss": 0.0, + "epoch": 2.75, + "label_loss": 0.07484418898820877, + "learning_rate": 1.957732658174247e-05, + "loss": 0.1293, + "step": 15243, + "teacher_loss": 0.13537269830703735 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4858418107032776, + "learning_rate": 1.9575163612094103e-05, + "loss": 0.2672, + "step": 15244, + "teacher_loss": 0.2429550290107727 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4726579785346985, + "learning_rate": 1.957300053755096e-05, + "loss": 0.2457, + "step": 15245, + "teacher_loss": 0.2204376459121704 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4978002905845642, + "learning_rate": 1.957083735816262e-05, + "loss": 0.2308, + "step": 15246, + "teacher_loss": 0.20113354921340942 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.6288567781448364, + "learning_rate": 1.9568674073978685e-05, + "loss": 0.2816, + "step": 15247, + "teacher_loss": 0.24298366904258728 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3875372111797333, + "learning_rate": 1.9566510685048752e-05, + "loss": 0.2473, + "step": 15248, + "teacher_loss": 0.2317335158586502 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4088730812072754, + "learning_rate": 1.956434719142242e-05, + "loss": 0.2391, + "step": 15249, + "teacher_loss": 0.22022981941699982 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.40942302346229553, + "learning_rate": 1.956218359314929e-05, + "loss": 0.2978, + "step": 15250, + "teacher_loss": 0.28534507751464844 + }, + { + "epoch": 2.76, + "eval_exact_match": 79.7445600756859, + "eval_f1": 87.1903482811981, + "step": 15250 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.41600728034973145, + "learning_rate": 1.956001989027897e-05, + "loss": 0.3106, + "step": 15251, + "teacher_loss": 0.29887473583221436 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3719387650489807, + "learning_rate": 1.9557856082861067e-05, + "loss": 0.2447, + "step": 15252, + "teacher_loss": 0.23055724799633026 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.38669300079345703, + "learning_rate": 1.9555692170945185e-05, + "loss": 0.1926, + "step": 15253, + "teacher_loss": 0.17103198170661926 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.8626641035079956, + "learning_rate": 1.9553528154580946e-05, + "loss": 0.3235, + "step": 15254, + "teacher_loss": 0.26356542110443115 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3669757843017578, + "learning_rate": 1.9551364033817955e-05, + "loss": 0.2944, + "step": 15255, + "teacher_loss": 0.2863014042377472 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.1520523577928543, + "learning_rate": 1.9549199808705834e-05, + "loss": 0.1871, + "step": 15256, + "teacher_loss": 0.19100482761859894 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4765346944332123, + "learning_rate": 1.9547035479294196e-05, + "loss": 0.3719, + "step": 15257, + "teacher_loss": 0.3602708578109741 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5200376510620117, + "learning_rate": 1.9544871045632667e-05, + "loss": 0.3026, + "step": 15258, + "teacher_loss": 0.27845266461372375 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5695130228996277, + "learning_rate": 1.954270650777088e-05, + "loss": 0.2594, + "step": 15259, + "teacher_loss": 0.22493885457515717 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.41902047395706177, + "learning_rate": 1.9540541865758446e-05, + "loss": 0.2287, + "step": 15260, + "teacher_loss": 0.20757697522640228 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.32884740829467773, + "learning_rate": 1.9538377119645e-05, + "loss": 0.2366, + "step": 15261, + "teacher_loss": 0.2263159155845642 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.17576825618743896, + "learning_rate": 1.9536212269480175e-05, + "loss": 0.3022, + "step": 15262, + "teacher_loss": 0.31626319885253906 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3241882920265198, + "learning_rate": 1.95340473153136e-05, + "loss": 0.2243, + "step": 15263, + "teacher_loss": 0.21318133175373077 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.67814040184021, + "learning_rate": 1.9531882257194916e-05, + "loss": 0.3581, + "step": 15264, + "teacher_loss": 0.3225933611392975 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.8148816227912903, + "learning_rate": 1.9529717095173764e-05, + "loss": 0.5407, + "step": 15265, + "teacher_loss": 0.5102474689483643 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5089905858039856, + "learning_rate": 1.9527551829299772e-05, + "loss": 0.4243, + "step": 15266, + "teacher_loss": 0.4148995280265808 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4893893003463745, + "learning_rate": 1.9525386459622594e-05, + "loss": 0.236, + "step": 15267, + "teacher_loss": 0.2078860104084015 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.287700891494751, + "learning_rate": 1.9523220986191873e-05, + "loss": 0.2523, + "step": 15268, + "teacher_loss": 0.2484011948108673 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.8317673206329346, + "learning_rate": 1.9521055409057254e-05, + "loss": 0.2652, + "step": 15269, + "teacher_loss": 0.2022302746772766 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.2522299289703369, + "learning_rate": 1.9518889728268398e-05, + "loss": 0.1665, + "step": 15270, + "teacher_loss": 0.1569472700357437 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.25777074694633484, + "learning_rate": 1.9516723943874938e-05, + "loss": 0.1945, + "step": 15271, + "teacher_loss": 0.18752457201480865 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3481416702270508, + "learning_rate": 1.9514558055926546e-05, + "loss": 0.2466, + "step": 15272, + "teacher_loss": 0.2353469431400299 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5196369886398315, + "learning_rate": 1.951239206447287e-05, + "loss": 0.3199, + "step": 15273, + "teacher_loss": 0.29775428771972656 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4133542776107788, + "learning_rate": 1.951022596956357e-05, + "loss": 0.2576, + "step": 15274, + "teacher_loss": 0.2402483969926834 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.36540108919143677, + "learning_rate": 1.9508059771248315e-05, + "loss": 0.4227, + "step": 15275, + "teacher_loss": 0.42908233404159546 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4989544749259949, + "learning_rate": 1.9505893469576767e-05, + "loss": 0.2677, + "step": 15276, + "teacher_loss": 0.2420121282339096 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3012455105781555, + "learning_rate": 1.9503727064598583e-05, + "loss": 0.3607, + "step": 15277, + "teacher_loss": 0.367296427488327 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 1.009514331817627, + "learning_rate": 1.950156055636345e-05, + "loss": 0.4326, + "step": 15278, + "teacher_loss": 0.36853593587875366 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.2650626301765442, + "learning_rate": 1.9499393944921027e-05, + "loss": 0.2065, + "step": 15279, + "teacher_loss": 0.20004716515541077 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.31511303782463074, + "learning_rate": 1.9497227230320992e-05, + "loss": 0.1859, + "step": 15280, + "teacher_loss": 0.1715787649154663 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.45767858624458313, + "learning_rate": 1.9495060412613018e-05, + "loss": 0.2633, + "step": 15281, + "teacher_loss": 0.24173401296138763 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4147833585739136, + "learning_rate": 1.9492893491846783e-05, + "loss": 0.2652, + "step": 15282, + "teacher_loss": 0.2485392987728119 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.660057783126831, + "learning_rate": 1.9490726468071973e-05, + "loss": 0.3465, + "step": 15283, + "teacher_loss": 0.3116726279258728 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3907315135002136, + "learning_rate": 1.9488559341338265e-05, + "loss": 0.1918, + "step": 15284, + "teacher_loss": 0.16965213418006897 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5510426759719849, + "learning_rate": 1.9486392111695357e-05, + "loss": 0.2491, + "step": 15285, + "teacher_loss": 0.2155945748090744 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.9693059921264648, + "learning_rate": 1.9484224779192924e-05, + "loss": 0.3176, + "step": 15286, + "teacher_loss": 0.24523687362670898 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3047083020210266, + "learning_rate": 1.9482057343880662e-05, + "loss": 0.3054, + "step": 15287, + "teacher_loss": 0.3055236041545868 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3794265389442444, + "learning_rate": 1.9479889805808263e-05, + "loss": 0.2015, + "step": 15288, + "teacher_loss": 0.1817849725484848 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.3205123543739319, + "learning_rate": 1.9477722165025422e-05, + "loss": 0.1689, + "step": 15289, + "teacher_loss": 0.1520829051733017 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.41750961542129517, + "learning_rate": 1.9475554421581835e-05, + "loss": 0.2558, + "step": 15290, + "teacher_loss": 0.23785004019737244 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5345646142959595, + "learning_rate": 1.9473386575527203e-05, + "loss": 0.2643, + "step": 15291, + "teacher_loss": 0.23427341878414154 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.8752279281616211, + "learning_rate": 1.947121862691123e-05, + "loss": 0.7775, + "step": 15292, + "teacher_loss": 0.76664799451828 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.4988873600959778, + "learning_rate": 1.9469050575783622e-05, + "loss": 0.3353, + "step": 15293, + "teacher_loss": 0.3171077370643616 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.9151477217674255, + "learning_rate": 1.9466882422194078e-05, + "loss": 0.4427, + "step": 15294, + "teacher_loss": 0.3902362585067749 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.2877733111381531, + "learning_rate": 1.9464714166192318e-05, + "loss": 0.1664, + "step": 15295, + "teacher_loss": 0.1529541164636612 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.2663087248802185, + "learning_rate": 1.9462545807828046e-05, + "loss": 0.1695, + "step": 15296, + "teacher_loss": 0.15874534845352173 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.5477350950241089, + "learning_rate": 1.946037734715098e-05, + "loss": 0.3764, + "step": 15297, + "teacher_loss": 0.3573461174964905 + }, + { + "compression_loss": 0.0, + "epoch": 2.76, + "label_loss": 0.6456408500671387, + "learning_rate": 1.945820878421083e-05, + "loss": 0.255, + "step": 15298, + "teacher_loss": 0.21157687902450562 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.9340426921844482, + "learning_rate": 1.945604011905732e-05, + "loss": 0.4878, + "step": 15299, + "teacher_loss": 0.4381910562515259 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.20558130741119385, + "learning_rate": 1.9453871351740173e-05, + "loss": 0.1843, + "step": 15300, + "teacher_loss": 0.18190276622772217 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.6753048896789551, + "learning_rate": 1.9451702482309106e-05, + "loss": 0.2425, + "step": 15301, + "teacher_loss": 0.19444096088409424 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.36088013648986816, + "learning_rate": 1.944953351081385e-05, + "loss": 0.2754, + "step": 15302, + "teacher_loss": 0.2658649682998657 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 1.4103915691375732, + "learning_rate": 1.944736443730413e-05, + "loss": 0.811, + "step": 15303, + "teacher_loss": 0.7443736791610718 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.31517595052719116, + "learning_rate": 1.9445195261829676e-05, + "loss": 0.181, + "step": 15304, + "teacher_loss": 0.1661023050546646 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.47994256019592285, + "learning_rate": 1.9443025984440223e-05, + "loss": 0.2559, + "step": 15305, + "teacher_loss": 0.23095327615737915 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.2898304760456085, + "learning_rate": 1.944085660518551e-05, + "loss": 0.2117, + "step": 15306, + "teacher_loss": 0.20302514731884003 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.33229899406433105, + "learning_rate": 1.9438687124115262e-05, + "loss": 0.2829, + "step": 15307, + "teacher_loss": 0.27739930152893066 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.46569719910621643, + "learning_rate": 1.943651754127923e-05, + "loss": 0.5578, + "step": 15308, + "teacher_loss": 0.5680338740348816 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.2508891820907593, + "learning_rate": 1.943434785672715e-05, + "loss": 0.2547, + "step": 15309, + "teacher_loss": 0.2550843358039856 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.4334692358970642, + "learning_rate": 1.943217807050877e-05, + "loss": 0.2678, + "step": 15310, + "teacher_loss": 0.24943788349628448 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.8599886894226074, + "learning_rate": 1.9430008182673836e-05, + "loss": 0.5846, + "step": 15311, + "teacher_loss": 0.5539662837982178 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.6390784978866577, + "learning_rate": 1.9427838193272096e-05, + "loss": 0.2868, + "step": 15312, + "teacher_loss": 0.24764171242713928 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.20647691190242767, + "learning_rate": 1.94256681023533e-05, + "loss": 0.2086, + "step": 15313, + "teacher_loss": 0.20881029963493347 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.13374051451683044, + "learning_rate": 1.9423497909967207e-05, + "loss": 0.1766, + "step": 15314, + "teacher_loss": 0.18137669563293457 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.4113527834415436, + "learning_rate": 1.9421327616163564e-05, + "loss": 0.2406, + "step": 15315, + "teacher_loss": 0.2216111123561859 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.5625593662261963, + "learning_rate": 1.9419157220992134e-05, + "loss": 0.3561, + "step": 15316, + "teacher_loss": 0.3331114649772644 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.69444739818573, + "learning_rate": 1.9416986724502685e-05, + "loss": 0.2253, + "step": 15317, + "teacher_loss": 0.17316468060016632 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.34339022636413574, + "learning_rate": 1.9414816126744968e-05, + "loss": 0.2026, + "step": 15318, + "teacher_loss": 0.18690776824951172 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.38562166690826416, + "learning_rate": 1.941264542776876e-05, + "loss": 0.2599, + "step": 15319, + "teacher_loss": 0.24588152766227722 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.5221768617630005, + "learning_rate": 1.9410474627623813e-05, + "loss": 0.2317, + "step": 15320, + "teacher_loss": 0.19938988983631134 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.5356217622756958, + "learning_rate": 1.940830372635991e-05, + "loss": 0.2818, + "step": 15321, + "teacher_loss": 0.2535601854324341 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.32742053270339966, + "learning_rate": 1.940613272402682e-05, + "loss": 0.217, + "step": 15322, + "teacher_loss": 0.20475628972053528 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.38767537474632263, + "learning_rate": 1.9403961620674318e-05, + "loss": 0.2005, + "step": 15323, + "teacher_loss": 0.17973214387893677 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.45740729570388794, + "learning_rate": 1.9401790416352175e-05, + "loss": 0.2768, + "step": 15324, + "teacher_loss": 0.2567288279533386 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.22176772356033325, + "learning_rate": 1.9399619111110182e-05, + "loss": 0.1659, + "step": 15325, + "teacher_loss": 0.15968795120716095 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.11045975238084793, + "learning_rate": 1.9397447704998106e-05, + "loss": 0.1375, + "step": 15326, + "teacher_loss": 0.14054521918296814 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.33058393001556396, + "learning_rate": 1.939527619806574e-05, + "loss": 0.2196, + "step": 15327, + "teacher_loss": 0.2073148936033249 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.33219119906425476, + "learning_rate": 1.9393104590362875e-05, + "loss": 0.2704, + "step": 15328, + "teacher_loss": 0.26353660225868225 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.22832022607326508, + "learning_rate": 1.939093288193929e-05, + "loss": 0.2118, + "step": 15329, + "teacher_loss": 0.2099297046661377 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.4364140033721924, + "learning_rate": 1.9388761072844776e-05, + "loss": 0.2729, + "step": 15330, + "teacher_loss": 0.2547116279602051 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.4045739769935608, + "learning_rate": 1.938658916312913e-05, + "loss": 0.2633, + "step": 15331, + "teacher_loss": 0.24755240976810455 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.510159969329834, + "learning_rate": 1.9384417152842142e-05, + "loss": 0.285, + "step": 15332, + "teacher_loss": 0.25997763872146606 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.6237562894821167, + "learning_rate": 1.9382245042033626e-05, + "loss": 0.3098, + "step": 15333, + "teacher_loss": 0.2749118506908417 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.119927778840065, + "learning_rate": 1.9380072830753358e-05, + "loss": 0.0954, + "step": 15334, + "teacher_loss": 0.09265954792499542 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.18138110637664795, + "learning_rate": 1.9377900519051158e-05, + "loss": 0.1797, + "step": 15335, + "teacher_loss": 0.17950746417045593 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.45671266317367554, + "learning_rate": 1.9375728106976824e-05, + "loss": 0.2856, + "step": 15336, + "teacher_loss": 0.2665832042694092 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.7529361248016357, + "learning_rate": 1.9373555594580158e-05, + "loss": 0.3408, + "step": 15337, + "teacher_loss": 0.294966459274292 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.3107438385486603, + "learning_rate": 1.937138298191098e-05, + "loss": 0.2642, + "step": 15338, + "teacher_loss": 0.2589777112007141 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.18152591586112976, + "learning_rate": 1.9369210269019095e-05, + "loss": 0.1591, + "step": 15339, + "teacher_loss": 0.15660011768341064 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.37157773971557617, + "learning_rate": 1.936703745595432e-05, + "loss": 0.3271, + "step": 15340, + "teacher_loss": 0.3221741318702698 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.37285345792770386, + "learning_rate": 1.936486454276647e-05, + "loss": 0.2483, + "step": 15341, + "teacher_loss": 0.23447629809379578 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.20179642736911774, + "learning_rate": 1.936269152950536e-05, + "loss": 0.1496, + "step": 15342, + "teacher_loss": 0.14383332431316376 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.6244723796844482, + "learning_rate": 1.936051841622081e-05, + "loss": 0.2874, + "step": 15343, + "teacher_loss": 0.2499159276485443 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.6533231735229492, + "learning_rate": 1.935834520296265e-05, + "loss": 0.4383, + "step": 15344, + "teacher_loss": 0.4144238829612732 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.3007589280605316, + "learning_rate": 1.93561718897807e-05, + "loss": 0.1902, + "step": 15345, + "teacher_loss": 0.17791253328323364 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.41964584589004517, + "learning_rate": 1.9353998476724796e-05, + "loss": 0.1869, + "step": 15346, + "teacher_loss": 0.16106031835079193 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.5858582258224487, + "learning_rate": 1.9351824963844753e-05, + "loss": 0.2894, + "step": 15347, + "teacher_loss": 0.25647345185279846 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.3030139207839966, + "learning_rate": 1.9349651351190415e-05, + "loss": 0.237, + "step": 15348, + "teacher_loss": 0.2297019064426422 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.48419642448425293, + "learning_rate": 1.934747763881161e-05, + "loss": 0.2392, + "step": 15349, + "teacher_loss": 0.21197223663330078 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.35033953189849854, + "learning_rate": 1.9345303826758178e-05, + "loss": 0.3075, + "step": 15350, + "teacher_loss": 0.30274152755737305 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.6534970998764038, + "learning_rate": 1.9343129915079956e-05, + "loss": 0.2609, + "step": 15351, + "teacher_loss": 0.21730080246925354 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.40793681144714355, + "learning_rate": 1.9340955903826788e-05, + "loss": 0.2294, + "step": 15352, + "teacher_loss": 0.20960572361946106 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.26840487122535706, + "learning_rate": 1.933878179304852e-05, + "loss": 0.2003, + "step": 15353, + "teacher_loss": 0.19271568953990936 + }, + { + "compression_loss": 0.0, + "epoch": 2.77, + "label_loss": 0.3471326231956482, + "learning_rate": 1.9336607582794985e-05, + "loss": 0.2656, + "step": 15354, + "teacher_loss": 0.25653257966041565 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.20372895896434784, + "learning_rate": 1.9334433273116046e-05, + "loss": 0.1942, + "step": 15355, + "teacher_loss": 0.19309498369693756 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3926127552986145, + "learning_rate": 1.933225886406154e-05, + "loss": 0.1904, + "step": 15356, + "teacher_loss": 0.16796863079071045 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.712735652923584, + "learning_rate": 1.9330084355681335e-05, + "loss": 0.4207, + "step": 15357, + "teacher_loss": 0.38825154304504395 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.41693758964538574, + "learning_rate": 1.9327909748025278e-05, + "loss": 0.2528, + "step": 15358, + "teacher_loss": 0.23458613455295563 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.8446159362792969, + "learning_rate": 1.9325735041143222e-05, + "loss": 0.3504, + "step": 15359, + "teacher_loss": 0.2955010235309601 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5601364374160767, + "learning_rate": 1.9323560235085033e-05, + "loss": 0.3274, + "step": 15360, + "teacher_loss": 0.3015804588794708 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5117583274841309, + "learning_rate": 1.9321385329900573e-05, + "loss": 0.3487, + "step": 15361, + "teacher_loss": 0.3305990695953369 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.44725680351257324, + "learning_rate": 1.9319210325639696e-05, + "loss": 0.1836, + "step": 15362, + "teacher_loss": 0.1543216109275818 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.38868963718414307, + "learning_rate": 1.9317035222352285e-05, + "loss": 0.2469, + "step": 15363, + "teacher_loss": 0.2311720997095108 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5383819341659546, + "learning_rate": 1.9314860020088194e-05, + "loss": 0.2819, + "step": 15364, + "teacher_loss": 0.25344693660736084 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.8332241177558899, + "learning_rate": 1.93126847188973e-05, + "loss": 0.3981, + "step": 15365, + "teacher_loss": 0.3497142195701599 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.24943889677524567, + "learning_rate": 1.931050931882948e-05, + "loss": 0.2349, + "step": 15366, + "teacher_loss": 0.23329699039459229 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3653255105018616, + "learning_rate": 1.9308333819934598e-05, + "loss": 0.2133, + "step": 15367, + "teacher_loss": 0.1963883936405182 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.8866627216339111, + "learning_rate": 1.930615822226254e-05, + "loss": 0.4229, + "step": 15368, + "teacher_loss": 0.37142395973205566 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.7773796319961548, + "learning_rate": 1.9303982525863188e-05, + "loss": 0.3685, + "step": 15369, + "teacher_loss": 0.32303106784820557 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.446260005235672, + "learning_rate": 1.930180673078642e-05, + "loss": 0.3041, + "step": 15370, + "teacher_loss": 0.2882963716983795 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.33706748485565186, + "learning_rate": 1.929963083708212e-05, + "loss": 0.3445, + "step": 15371, + "teacher_loss": 0.3453790545463562 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.4106626510620117, + "learning_rate": 1.9297454844800172e-05, + "loss": 0.2056, + "step": 15372, + "teacher_loss": 0.1828521341085434 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3490701913833618, + "learning_rate": 1.9295278753990475e-05, + "loss": 0.2546, + "step": 15373, + "teacher_loss": 0.2441122978925705 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5082021355628967, + "learning_rate": 1.9293102564702912e-05, + "loss": 0.2587, + "step": 15374, + "teacher_loss": 0.23097503185272217 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.39774590730667114, + "learning_rate": 1.9290926276987373e-05, + "loss": 0.2017, + "step": 15375, + "teacher_loss": 0.17990842461585999 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.35292062163352966, + "learning_rate": 1.9288749890893768e-05, + "loss": 0.2353, + "step": 15376, + "teacher_loss": 0.22222284972667694 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.20118297636508942, + "learning_rate": 1.928657340647198e-05, + "loss": 0.2515, + "step": 15377, + "teacher_loss": 0.25704899430274963 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5447788238525391, + "learning_rate": 1.9284396823771922e-05, + "loss": 0.2812, + "step": 15378, + "teacher_loss": 0.251919150352478 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5870913863182068, + "learning_rate": 1.928222014284348e-05, + "loss": 0.4063, + "step": 15379, + "teacher_loss": 0.38619673252105713 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.7765418887138367, + "learning_rate": 1.928004336373658e-05, + "loss": 0.3454, + "step": 15380, + "teacher_loss": 0.2975092828273773 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3485333025455475, + "learning_rate": 1.9277866486501113e-05, + "loss": 0.2375, + "step": 15381, + "teacher_loss": 0.22516947984695435 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.38803336024284363, + "learning_rate": 1.9275689511186995e-05, + "loss": 0.2028, + "step": 15382, + "teacher_loss": 0.18218934535980225 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3977811932563782, + "learning_rate": 1.9273512437844135e-05, + "loss": 0.2817, + "step": 15383, + "teacher_loss": 0.26877161860466003 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.6130324602127075, + "learning_rate": 1.9271335266522443e-05, + "loss": 0.2424, + "step": 15384, + "teacher_loss": 0.20116689801216125 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5464110374450684, + "learning_rate": 1.9269157997271846e-05, + "loss": 0.3315, + "step": 15385, + "teacher_loss": 0.30760204792022705 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.41381633281707764, + "learning_rate": 1.9266980630142252e-05, + "loss": 0.243, + "step": 15386, + "teacher_loss": 0.2240362912416458 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.2740339934825897, + "learning_rate": 1.9264803165183585e-05, + "loss": 0.3827, + "step": 15387, + "teacher_loss": 0.39475834369659424 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.21634894609451294, + "learning_rate": 1.9262625602445773e-05, + "loss": 0.2197, + "step": 15388, + "teacher_loss": 0.2201223373413086 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.19091299176216125, + "learning_rate": 1.926044794197873e-05, + "loss": 0.2231, + "step": 15389, + "teacher_loss": 0.22667405009269714 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5715618133544922, + "learning_rate": 1.925827018383239e-05, + "loss": 0.2813, + "step": 15390, + "teacher_loss": 0.24906259775161743 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.15555621683597565, + "learning_rate": 1.9256092328056685e-05, + "loss": 0.2894, + "step": 15391, + "teacher_loss": 0.304284930229187 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.8207415342330933, + "learning_rate": 1.925391437470154e-05, + "loss": 0.5705, + "step": 15392, + "teacher_loss": 0.5427036285400391 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.142722025513649, + "learning_rate": 1.9251736323816897e-05, + "loss": 0.1684, + "step": 15393, + "teacher_loss": 0.1712525635957718 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3283693790435791, + "learning_rate": 1.924955817545268e-05, + "loss": 0.2054, + "step": 15394, + "teacher_loss": 0.19175118207931519 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.20660507678985596, + "learning_rate": 1.924737992965884e-05, + "loss": 0.1957, + "step": 15395, + "teacher_loss": 0.19450122117996216 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.26444393396377563, + "learning_rate": 1.924520158648531e-05, + "loss": 0.2174, + "step": 15396, + "teacher_loss": 0.2121778130531311 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.22841104865074158, + "learning_rate": 1.9243023145982037e-05, + "loss": 0.2033, + "step": 15397, + "teacher_loss": 0.20048648118972778 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.29124128818511963, + "learning_rate": 1.9240844608198964e-05, + "loss": 0.1725, + "step": 15398, + "teacher_loss": 0.15928146243095398 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.7308986186981201, + "learning_rate": 1.9238665973186037e-05, + "loss": 0.299, + "step": 15399, + "teacher_loss": 0.2510484457015991 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.20163044333457947, + "learning_rate": 1.923648724099321e-05, + "loss": 0.1943, + "step": 15400, + "teacher_loss": 0.19347965717315674 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.5834843516349792, + "learning_rate": 1.9234308411670435e-05, + "loss": 0.2558, + "step": 15401, + "teacher_loss": 0.21933594346046448 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.62132328748703, + "learning_rate": 1.923212948526766e-05, + "loss": 0.3951, + "step": 15402, + "teacher_loss": 0.369960218667984 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.3575960397720337, + "learning_rate": 1.9229950461834845e-05, + "loss": 0.2526, + "step": 15403, + "teacher_loss": 0.24092787504196167 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.4675140380859375, + "learning_rate": 1.9227771341421945e-05, + "loss": 0.2513, + "step": 15404, + "teacher_loss": 0.2272489070892334 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.32540085911750793, + "learning_rate": 1.9225592124078925e-05, + "loss": 0.2055, + "step": 15405, + "teacher_loss": 0.19222715497016907 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.7232991456985474, + "learning_rate": 1.922341280985575e-05, + "loss": 0.4933, + "step": 15406, + "teacher_loss": 0.4677307605743408 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.27725711464881897, + "learning_rate": 1.922123339880238e-05, + "loss": 0.204, + "step": 15407, + "teacher_loss": 0.19587118923664093 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.6356396675109863, + "learning_rate": 1.9219053890968782e-05, + "loss": 0.4599, + "step": 15408, + "teacher_loss": 0.44042283296585083 + }, + { + "compression_loss": 0.0, + "epoch": 2.78, + "label_loss": 0.21995791792869568, + "learning_rate": 1.9216874286404927e-05, + "loss": 0.1643, + "step": 15409, + "teacher_loss": 0.15806464850902557 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.419097363948822, + "learning_rate": 1.921469458516079e-05, + "loss": 0.3189, + "step": 15410, + "teacher_loss": 0.3078019618988037 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.37716197967529297, + "learning_rate": 1.921251478728634e-05, + "loss": 0.3801, + "step": 15411, + "teacher_loss": 0.38041630387306213 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.6402702927589417, + "learning_rate": 1.9210334892831562e-05, + "loss": 0.3405, + "step": 15412, + "teacher_loss": 0.30720949172973633 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3596067428588867, + "learning_rate": 1.9208154901846422e-05, + "loss": 0.2027, + "step": 15413, + "teacher_loss": 0.18524807691574097 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.6070290803909302, + "learning_rate": 1.9205974814380906e-05, + "loss": 0.374, + "step": 15414, + "teacher_loss": 0.348120778799057 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3950555920600891, + "learning_rate": 1.9203794630485004e-05, + "loss": 0.2118, + "step": 15415, + "teacher_loss": 0.19141331315040588 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4933011531829834, + "learning_rate": 1.920161435020869e-05, + "loss": 0.3456, + "step": 15416, + "teacher_loss": 0.32919132709503174 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5695645809173584, + "learning_rate": 1.9199433973601955e-05, + "loss": 0.2451, + "step": 15417, + "teacher_loss": 0.20904628932476044 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.21928316354751587, + "learning_rate": 1.9197253500714797e-05, + "loss": 0.2482, + "step": 15418, + "teacher_loss": 0.2514092028141022 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.7365667819976807, + "learning_rate": 1.919507293159719e-05, + "loss": 0.4647, + "step": 15419, + "teacher_loss": 0.4345453381538391 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5829306244850159, + "learning_rate": 1.9192892266299144e-05, + "loss": 0.2626, + "step": 15420, + "teacher_loss": 0.2270369678735733 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.6354122757911682, + "learning_rate": 1.919071150487065e-05, + "loss": 0.28, + "step": 15421, + "teacher_loss": 0.24051980674266815 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.2125316858291626, + "learning_rate": 1.9188530647361704e-05, + "loss": 0.2635, + "step": 15422, + "teacher_loss": 0.2692033052444458 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4536246657371521, + "learning_rate": 1.9186349693822312e-05, + "loss": 0.2328, + "step": 15423, + "teacher_loss": 0.20824947953224182 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.6706361174583435, + "learning_rate": 1.9184168644302466e-05, + "loss": 0.25, + "step": 15424, + "teacher_loss": 0.20325596630573273 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5530021786689758, + "learning_rate": 1.9181987498852175e-05, + "loss": 0.3416, + "step": 15425, + "teacher_loss": 0.318141371011734 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.15645934641361237, + "learning_rate": 1.917980625752146e-05, + "loss": 0.2224, + "step": 15426, + "teacher_loss": 0.22971150279045105 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.7199164628982544, + "learning_rate": 1.917762492036031e-05, + "loss": 0.4066, + "step": 15427, + "teacher_loss": 0.37179577350616455 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4001297950744629, + "learning_rate": 1.9175443487418754e-05, + "loss": 0.1977, + "step": 15428, + "teacher_loss": 0.17515522241592407 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.9426354169845581, + "learning_rate": 1.9173261958746793e-05, + "loss": 0.3102, + "step": 15429, + "teacher_loss": 0.23995313048362732 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5005701184272766, + "learning_rate": 1.9171080334394444e-05, + "loss": 0.2994, + "step": 15430, + "teacher_loss": 0.2769980728626251 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.21406710147857666, + "learning_rate": 1.9168898614411733e-05, + "loss": 0.2847, + "step": 15431, + "teacher_loss": 0.29255950450897217 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4387319087982178, + "learning_rate": 1.9166716798848676e-05, + "loss": 0.2175, + "step": 15432, + "teacher_loss": 0.19287964701652527 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4194740355014801, + "learning_rate": 1.9164534887755292e-05, + "loss": 0.2878, + "step": 15433, + "teacher_loss": 0.2731245756149292 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3970203399658203, + "learning_rate": 1.916235288118161e-05, + "loss": 0.2829, + "step": 15434, + "teacher_loss": 0.2701733112335205 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.7173104882240295, + "learning_rate": 1.9160170779177657e-05, + "loss": 0.4083, + "step": 15435, + "teacher_loss": 0.3739841878414154 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5822244882583618, + "learning_rate": 1.9157988581793463e-05, + "loss": 0.3525, + "step": 15436, + "teacher_loss": 0.32701003551483154 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.9780691862106323, + "learning_rate": 1.9155806289079053e-05, + "loss": 0.3841, + "step": 15437, + "teacher_loss": 0.31810736656188965 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5977356433868408, + "learning_rate": 1.915362390108447e-05, + "loss": 0.3182, + "step": 15438, + "teacher_loss": 0.28715863823890686 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.48312854766845703, + "learning_rate": 1.9151441417859733e-05, + "loss": 0.2482, + "step": 15439, + "teacher_loss": 0.22208517789840698 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4813908338546753, + "learning_rate": 1.91492588394549e-05, + "loss": 0.3248, + "step": 15440, + "teacher_loss": 0.30738508701324463 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.14062903821468353, + "learning_rate": 1.914707616592e-05, + "loss": 0.1747, + "step": 15441, + "teacher_loss": 0.17854070663452148 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.18599435687065125, + "learning_rate": 1.9144893397305077e-05, + "loss": 0.2603, + "step": 15442, + "teacher_loss": 0.2685200572013855 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.38677364587783813, + "learning_rate": 1.914271053366018e-05, + "loss": 0.2403, + "step": 15443, + "teacher_loss": 0.22402793169021606 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.4892314076423645, + "learning_rate": 1.914052757503534e-05, + "loss": 0.2413, + "step": 15444, + "teacher_loss": 0.2137695997953415 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3936445116996765, + "learning_rate": 1.9138344521480628e-05, + "loss": 0.1686, + "step": 15445, + "teacher_loss": 0.14355415105819702 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.35831132531166077, + "learning_rate": 1.913616137304608e-05, + "loss": 0.2937, + "step": 15446, + "teacher_loss": 0.2865021824836731 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.9481849074363708, + "learning_rate": 1.9133978129781748e-05, + "loss": 0.351, + "step": 15447, + "teacher_loss": 0.28465744853019714 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.48017382621765137, + "learning_rate": 1.9131794791737704e-05, + "loss": 0.2008, + "step": 15448, + "teacher_loss": 0.16977854073047638 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.16602830588817596, + "learning_rate": 1.9129611358963978e-05, + "loss": 0.1897, + "step": 15449, + "teacher_loss": 0.1923007369041443 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.45903316140174866, + "learning_rate": 1.912742783151065e-05, + "loss": 0.2485, + "step": 15450, + "teacher_loss": 0.2251119613647461 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5154883861541748, + "learning_rate": 1.912524420942778e-05, + "loss": 0.3216, + "step": 15451, + "teacher_loss": 0.300012469291687 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.548113226890564, + "learning_rate": 1.9123060492765422e-05, + "loss": 0.4535, + "step": 15452, + "teacher_loss": 0.442943274974823 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3259221911430359, + "learning_rate": 1.9120876681573656e-05, + "loss": 0.2872, + "step": 15453, + "teacher_loss": 0.28293758630752563 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3390854001045227, + "learning_rate": 1.911869277590254e-05, + "loss": 0.3488, + "step": 15454, + "teacher_loss": 0.3499288260936737 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5569580793380737, + "learning_rate": 1.9116508775802143e-05, + "loss": 0.2861, + "step": 15455, + "teacher_loss": 0.2560235857963562 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.371101438999176, + "learning_rate": 1.911432468132255e-05, + "loss": 0.3239, + "step": 15456, + "teacher_loss": 0.31864434480667114 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.7027199268341064, + "learning_rate": 1.911214049251382e-05, + "loss": 0.2799, + "step": 15457, + "teacher_loss": 0.23296645283699036 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5315957069396973, + "learning_rate": 1.910995620942604e-05, + "loss": 0.2437, + "step": 15458, + "teacher_loss": 0.21166233718395233 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.7411613464355469, + "learning_rate": 1.9107771832109288e-05, + "loss": 0.697, + "step": 15459, + "teacher_loss": 0.6921473741531372 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.5061233043670654, + "learning_rate": 1.9105587360613642e-05, + "loss": 0.2554, + "step": 15460, + "teacher_loss": 0.2275104820728302 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.8083809614181519, + "learning_rate": 1.9103402794989186e-05, + "loss": 0.3693, + "step": 15461, + "teacher_loss": 0.32046806812286377 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.3506333529949188, + "learning_rate": 1.9101218135286007e-05, + "loss": 0.1816, + "step": 15462, + "teacher_loss": 0.16281311213970184 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.45168614387512207, + "learning_rate": 1.9099033381554192e-05, + "loss": 0.2991, + "step": 15463, + "teacher_loss": 0.28219398856163025 + }, + { + "compression_loss": 0.0, + "epoch": 2.79, + "label_loss": 0.6518122553825378, + "learning_rate": 1.9096848533843834e-05, + "loss": 0.2654, + "step": 15464, + "teacher_loss": 0.22241343557834625 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.20811881124973297, + "learning_rate": 1.9094663592205017e-05, + "loss": 0.208, + "step": 15465, + "teacher_loss": 0.20794963836669922 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.3877553343772888, + "learning_rate": 1.9092478556687848e-05, + "loss": 0.315, + "step": 15466, + "teacher_loss": 0.30690741539001465 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.3082616329193115, + "learning_rate": 1.9090293427342406e-05, + "loss": 0.2151, + "step": 15467, + "teacher_loss": 0.20477703213691711 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.45852968096733093, + "learning_rate": 1.908810820421881e-05, + "loss": 0.386, + "step": 15468, + "teacher_loss": 0.3779807686805725 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.411659836769104, + "learning_rate": 1.9085922887367138e-05, + "loss": 0.2071, + "step": 15469, + "teacher_loss": 0.18435998260974884 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.46716147661209106, + "learning_rate": 1.9083737476837512e-05, + "loss": 0.281, + "step": 15470, + "teacher_loss": 0.2603006064891815 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.5763517022132874, + "learning_rate": 1.9081551972680025e-05, + "loss": 0.3504, + "step": 15471, + "teacher_loss": 0.32532578706741333 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.22218668460845947, + "learning_rate": 1.907936637494479e-05, + "loss": 0.1967, + "step": 15472, + "teacher_loss": 0.19387857615947723 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.24990811944007874, + "learning_rate": 1.9077180683681914e-05, + "loss": 0.286, + "step": 15473, + "teacher_loss": 0.2900213301181793 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.43082424998283386, + "learning_rate": 1.907499489894151e-05, + "loss": 0.2383, + "step": 15474, + "teacher_loss": 0.21685364842414856 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.5592328906059265, + "learning_rate": 1.9072809020773696e-05, + "loss": 0.2134, + "step": 15475, + "teacher_loss": 0.1749754548072815 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.7293300032615662, + "learning_rate": 1.9070623049228575e-05, + "loss": 0.2904, + "step": 15476, + "teacher_loss": 0.24163228273391724 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.6213291883468628, + "learning_rate": 1.9068436984356274e-05, + "loss": 0.2533, + "step": 15477, + "teacher_loss": 0.21244600415229797 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.5540156960487366, + "learning_rate": 1.906625082620691e-05, + "loss": 0.2158, + "step": 15478, + "teacher_loss": 0.1782737523317337 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.1648986041545868, + "learning_rate": 1.9064064574830606e-05, + "loss": 0.2153, + "step": 15479, + "teacher_loss": 0.22089609503746033 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.23908287286758423, + "learning_rate": 1.9061878230277485e-05, + "loss": 0.2692, + "step": 15480, + "teacher_loss": 0.272597074508667 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 1.263882040977478, + "learning_rate": 1.905969179259768e-05, + "loss": 0.3981, + "step": 15481, + "teacher_loss": 0.30194544792175293 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.1613868921995163, + "learning_rate": 1.9057505261841305e-05, + "loss": 0.212, + "step": 15482, + "teacher_loss": 0.21761903166770935 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.20352189242839813, + "learning_rate": 1.9055318638058504e-05, + "loss": 0.2041, + "step": 15483, + "teacher_loss": 0.20414231717586517 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.2667786180973053, + "learning_rate": 1.9053131921299404e-05, + "loss": 0.1904, + "step": 15484, + "teacher_loss": 0.18195980787277222 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 1.0054373741149902, + "learning_rate": 1.9050945111614142e-05, + "loss": 0.2998, + "step": 15485, + "teacher_loss": 0.2213945835828781 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.17990493774414062, + "learning_rate": 1.9048758209052856e-05, + "loss": 0.2183, + "step": 15486, + "teacher_loss": 0.22258487343788147 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.681825578212738, + "learning_rate": 1.904657121366568e-05, + "loss": 0.348, + "step": 15487, + "teacher_loss": 0.31090688705444336 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.578462541103363, + "learning_rate": 1.904438412550276e-05, + "loss": 0.2308, + "step": 15488, + "teacher_loss": 0.19216594099998474 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.44593191146850586, + "learning_rate": 1.9042196944614234e-05, + "loss": 0.2323, + "step": 15489, + "teacher_loss": 0.2085266411304474 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.42436695098876953, + "learning_rate": 1.9040009671050253e-05, + "loss": 0.3933, + "step": 15490, + "teacher_loss": 0.389850914478302 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.2601102888584137, + "learning_rate": 1.9037822304860967e-05, + "loss": 0.2496, + "step": 15491, + "teacher_loss": 0.24843838810920715 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.37709468603134155, + "learning_rate": 1.903563484609652e-05, + "loss": 0.2585, + "step": 15492, + "teacher_loss": 0.24529266357421875 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.7521003484725952, + "learning_rate": 1.903344729480706e-05, + "loss": 0.4214, + "step": 15493, + "teacher_loss": 0.3846304416656494 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.525193452835083, + "learning_rate": 1.903125965104275e-05, + "loss": 0.366, + "step": 15494, + "teacher_loss": 0.3483607769012451 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.232259601354599, + "learning_rate": 1.902907191485374e-05, + "loss": 0.207, + "step": 15495, + "teacher_loss": 0.2042347490787506 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.46593573689460754, + "learning_rate": 1.9026884086290196e-05, + "loss": 0.1987, + "step": 15496, + "teacher_loss": 0.1689864844083786 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.47424525022506714, + "learning_rate": 1.9024696165402272e-05, + "loss": 0.3479, + "step": 15497, + "teacher_loss": 0.3339027762413025 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.1579851508140564, + "learning_rate": 1.902250815224013e-05, + "loss": 0.2085, + "step": 15498, + "teacher_loss": 0.2141035944223404 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.3872631788253784, + "learning_rate": 1.9020320046853935e-05, + "loss": 0.283, + "step": 15499, + "teacher_loss": 0.2714151442050934 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.6938329935073853, + "learning_rate": 1.9018131849293856e-05, + "loss": 0.2964, + "step": 15500, + "teacher_loss": 0.2521893084049225 + }, + { + "epoch": 2.8, + "eval_exact_match": 80.08514664143803, + "eval_f1": 87.3458180711934, + "step": 15500 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.32541730999946594, + "learning_rate": 1.9015943559610063e-05, + "loss": 0.334, + "step": 15501, + "teacher_loss": 0.33500736951828003 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.5618366003036499, + "learning_rate": 1.901375517785272e-05, + "loss": 0.294, + "step": 15502, + "teacher_loss": 0.2642078995704651 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.7149922251701355, + "learning_rate": 1.9011566704072007e-05, + "loss": 0.372, + "step": 15503, + "teacher_loss": 0.3338811993598938 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 1.0027284622192383, + "learning_rate": 1.90093781383181e-05, + "loss": 0.6309, + "step": 15504, + "teacher_loss": 0.5895869731903076 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.3709099590778351, + "learning_rate": 1.9007189480641168e-05, + "loss": 0.303, + "step": 15505, + "teacher_loss": 0.2954355776309967 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.7155470252037048, + "learning_rate": 1.90050007310914e-05, + "loss": 0.3456, + "step": 15506, + "teacher_loss": 0.3044409453868866 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.28512662649154663, + "learning_rate": 1.9002811889718966e-05, + "loss": 0.2266, + "step": 15507, + "teacher_loss": 0.22005261480808258 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.2743754982948303, + "learning_rate": 1.9000622956574063e-05, + "loss": 0.2676, + "step": 15508, + "teacher_loss": 0.266897976398468 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.36258500814437866, + "learning_rate": 1.8998433931706868e-05, + "loss": 0.213, + "step": 15509, + "teacher_loss": 0.19641447067260742 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 1.0479073524475098, + "learning_rate": 1.8996244815167568e-05, + "loss": 1.0109, + "step": 15510, + "teacher_loss": 1.0067338943481445 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.7324337363243103, + "learning_rate": 1.8994055607006363e-05, + "loss": 0.2663, + "step": 15511, + "teacher_loss": 0.21455280482769012 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.2905508279800415, + "learning_rate": 1.899186630727343e-05, + "loss": 0.1903, + "step": 15512, + "teacher_loss": 0.17915646731853485 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.4828190207481384, + "learning_rate": 1.8989676916018976e-05, + "loss": 0.218, + "step": 15513, + "teacher_loss": 0.18862539529800415 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.2640182077884674, + "learning_rate": 1.898748743329319e-05, + "loss": 0.1846, + "step": 15514, + "teacher_loss": 0.17574471235275269 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.17918409407138824, + "learning_rate": 1.898529785914627e-05, + "loss": 0.3063, + "step": 15515, + "teacher_loss": 0.32046833634376526 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.29043838381767273, + "learning_rate": 1.8983108193628425e-05, + "loss": 0.2327, + "step": 15516, + "teacher_loss": 0.22626343369483948 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.44323647022247314, + "learning_rate": 1.898091843678984e-05, + "loss": 0.2247, + "step": 15517, + "teacher_loss": 0.2003874033689499 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.5753565430641174, + "learning_rate": 1.8978728588680744e-05, + "loss": 0.2813, + "step": 15518, + "teacher_loss": 0.24860483407974243 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.5129090547561646, + "learning_rate": 1.897653864935132e-05, + "loss": 0.2844, + "step": 15519, + "teacher_loss": 0.2589606046676636 + }, + { + "compression_loss": 0.0, + "epoch": 2.8, + "label_loss": 0.3188718557357788, + "learning_rate": 1.897434861885179e-05, + "loss": 0.2179, + "step": 15520, + "teacher_loss": 0.2066279500722885 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.35984236001968384, + "learning_rate": 1.8972158497232365e-05, + "loss": 0.2603, + "step": 15521, + "teacher_loss": 0.24920672178268433 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.264047235250473, + "learning_rate": 1.896996828454325e-05, + "loss": 0.1933, + "step": 15522, + "teacher_loss": 0.18547002971172333 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4257519543170929, + "learning_rate": 1.8967777980834668e-05, + "loss": 0.2409, + "step": 15523, + "teacher_loss": 0.22037647664546967 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.16302679479122162, + "learning_rate": 1.8965587586156833e-05, + "loss": 0.1563, + "step": 15524, + "teacher_loss": 0.15558823943138123 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4739953279495239, + "learning_rate": 1.8963397100559965e-05, + "loss": 0.3629, + "step": 15525, + "teacher_loss": 0.35060369968414307 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.33425238728523254, + "learning_rate": 1.8961206524094284e-05, + "loss": 0.3309, + "step": 15526, + "teacher_loss": 0.33055824041366577 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.3556947112083435, + "learning_rate": 1.895901585681001e-05, + "loss": 0.1528, + "step": 15527, + "teacher_loss": 0.13030007481575012 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.22201834619045258, + "learning_rate": 1.8956825098757377e-05, + "loss": 0.2478, + "step": 15528, + "teacher_loss": 0.2506236135959625 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.2079494446516037, + "learning_rate": 1.8954634249986602e-05, + "loss": 0.184, + "step": 15529, + "teacher_loss": 0.18128818273544312 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.2944962680339813, + "learning_rate": 1.8952443310547926e-05, + "loss": 0.2676, + "step": 15530, + "teacher_loss": 0.2645842432975769 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.1734279841184616, + "learning_rate": 1.895025228049157e-05, + "loss": 0.2391, + "step": 15531, + "teacher_loss": 0.24639226496219635 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.6183395981788635, + "learning_rate": 1.8948061159867774e-05, + "loss": 0.3579, + "step": 15532, + "teacher_loss": 0.32891008257865906 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.507132887840271, + "learning_rate": 1.8945869948726774e-05, + "loss": 0.2778, + "step": 15533, + "teacher_loss": 0.2522915005683899 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.6699087619781494, + "learning_rate": 1.894367864711881e-05, + "loss": 0.3231, + "step": 15534, + "teacher_loss": 0.2845231294631958 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.41367071866989136, + "learning_rate": 1.8941487255094112e-05, + "loss": 0.2485, + "step": 15535, + "teacher_loss": 0.23012655973434448 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.7437030076980591, + "learning_rate": 1.8939295772702933e-05, + "loss": 0.2718, + "step": 15536, + "teacher_loss": 0.21932527422904968 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.37080633640289307, + "learning_rate": 1.893710419999551e-05, + "loss": 0.2116, + "step": 15537, + "teacher_loss": 0.19390274584293365 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.42633703351020813, + "learning_rate": 1.8934912537022094e-05, + "loss": 0.1711, + "step": 15538, + "teacher_loss": 0.14274394512176514 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.37351471185684204, + "learning_rate": 1.8932720783832926e-05, + "loss": 0.2574, + "step": 15539, + "teacher_loss": 0.24454814195632935 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.2345258593559265, + "learning_rate": 1.8930528940478263e-05, + "loss": 0.1896, + "step": 15540, + "teacher_loss": 0.18465213477611542 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.6827257871627808, + "learning_rate": 1.892833700700836e-05, + "loss": 0.2493, + "step": 15541, + "teacher_loss": 0.20110656321048737 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4447067379951477, + "learning_rate": 1.8926144983473464e-05, + "loss": 0.2693, + "step": 15542, + "teacher_loss": 0.24985337257385254 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.21103844046592712, + "learning_rate": 1.8923952869923836e-05, + "loss": 0.2143, + "step": 15543, + "teacher_loss": 0.21465227007865906 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.2783176898956299, + "learning_rate": 1.8921760666409734e-05, + "loss": 0.2716, + "step": 15544, + "teacher_loss": 0.2708452343940735 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.8818731307983398, + "learning_rate": 1.891956837298141e-05, + "loss": 0.8136, + "step": 15545, + "teacher_loss": 0.8059999942779541 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4194653630256653, + "learning_rate": 1.8917375989689146e-05, + "loss": 0.2238, + "step": 15546, + "teacher_loss": 0.20201988518238068 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.9790559411048889, + "learning_rate": 1.8915183516583194e-05, + "loss": 0.5023, + "step": 15547, + "teacher_loss": 0.44937628507614136 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.2915409803390503, + "learning_rate": 1.8912990953713812e-05, + "loss": 0.195, + "step": 15548, + "teacher_loss": 0.18431204557418823 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.42722997069358826, + "learning_rate": 1.891079830113129e-05, + "loss": 0.3306, + "step": 15549, + "teacher_loss": 0.319845050573349 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.42971712350845337, + "learning_rate": 1.8908605558885882e-05, + "loss": 0.2558, + "step": 15550, + "teacher_loss": 0.2364306002855301 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4738118350505829, + "learning_rate": 1.8906412727027873e-05, + "loss": 0.2865, + "step": 15551, + "teacher_loss": 0.2657358646392822 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.34051600098609924, + "learning_rate": 1.8904219805607527e-05, + "loss": 0.1948, + "step": 15552, + "teacher_loss": 0.17857897281646729 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.38116997480392456, + "learning_rate": 1.8902026794675124e-05, + "loss": 0.2724, + "step": 15553, + "teacher_loss": 0.2602729797363281 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.5266185402870178, + "learning_rate": 1.8899833694280952e-05, + "loss": 0.2993, + "step": 15554, + "teacher_loss": 0.27399206161499023 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.3713909685611725, + "learning_rate": 1.8897640504475283e-05, + "loss": 0.2725, + "step": 15555, + "teacher_loss": 0.26149219274520874 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4412344694137573, + "learning_rate": 1.8895447225308403e-05, + "loss": 0.2516, + "step": 15556, + "teacher_loss": 0.2305634617805481 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.6124913692474365, + "learning_rate": 1.8893253856830597e-05, + "loss": 0.409, + "step": 15557, + "teacher_loss": 0.3864133059978485 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 1.1241494417190552, + "learning_rate": 1.8891060399092153e-05, + "loss": 0.2867, + "step": 15558, + "teacher_loss": 0.193625807762146 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.42997950315475464, + "learning_rate": 1.8888866852143363e-05, + "loss": 0.3795, + "step": 15559, + "teacher_loss": 0.37392929196357727 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.39414238929748535, + "learning_rate": 1.8886673216034513e-05, + "loss": 0.1887, + "step": 15560, + "teacher_loss": 0.16592717170715332 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.43040138483047485, + "learning_rate": 1.88844794908159e-05, + "loss": 0.2094, + "step": 15561, + "teacher_loss": 0.18483954668045044 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.3976280689239502, + "learning_rate": 1.888228567653781e-05, + "loss": 0.2363, + "step": 15562, + "teacher_loss": 0.21836315095424652 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.38169950246810913, + "learning_rate": 1.8880091773250558e-05, + "loss": 0.2478, + "step": 15563, + "teacher_loss": 0.23293665051460266 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.3617252707481384, + "learning_rate": 1.8877897781004435e-05, + "loss": 0.2197, + "step": 15564, + "teacher_loss": 0.2039305865764618 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 1.1804354190826416, + "learning_rate": 1.887570369984974e-05, + "loss": 0.2984, + "step": 15565, + "teacher_loss": 0.20037254691123962 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.5848848819732666, + "learning_rate": 1.887350952983678e-05, + "loss": 0.2582, + "step": 15566, + "teacher_loss": 0.2219366729259491 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.4347054660320282, + "learning_rate": 1.8871315271015857e-05, + "loss": 0.228, + "step": 15567, + "teacher_loss": 0.20500747859477997 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.419778048992157, + "learning_rate": 1.8869120923437284e-05, + "loss": 0.2792, + "step": 15568, + "teacher_loss": 0.2636294960975647 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.2278396487236023, + "learning_rate": 1.8866926487151374e-05, + "loss": 0.1876, + "step": 15569, + "teacher_loss": 0.18314608931541443 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.8811348676681519, + "learning_rate": 1.8864731962208422e-05, + "loss": 0.3872, + "step": 15570, + "teacher_loss": 0.3323003649711609 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.5627462267875671, + "learning_rate": 1.8862537348658764e-05, + "loss": 0.3403, + "step": 15571, + "teacher_loss": 0.31559625267982483 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.5714628100395203, + "learning_rate": 1.8860342646552698e-05, + "loss": 0.2617, + "step": 15572, + "teacher_loss": 0.22725656628608704 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.3889523148536682, + "learning_rate": 1.8858147855940546e-05, + "loss": 0.2418, + "step": 15573, + "teacher_loss": 0.22547374665737152 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.44961076974868774, + "learning_rate": 1.885595297687264e-05, + "loss": 0.2337, + "step": 15574, + "teacher_loss": 0.20969700813293457 + }, + { + "compression_loss": 0.0, + "epoch": 2.81, + "label_loss": 0.5013306736946106, + "learning_rate": 1.8853758009399288e-05, + "loss": 0.3811, + "step": 15575, + "teacher_loss": 0.3677855134010315 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.47343164682388306, + "learning_rate": 1.8851562953570824e-05, + "loss": 0.2834, + "step": 15576, + "teacher_loss": 0.2622416615486145 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.6585253477096558, + "learning_rate": 1.8849367809437562e-05, + "loss": 0.5735, + "step": 15577, + "teacher_loss": 0.5640000104904175 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.2611042857170105, + "learning_rate": 1.8847172577049837e-05, + "loss": 0.1916, + "step": 15578, + "teacher_loss": 0.18387891352176666 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.36949077248573303, + "learning_rate": 1.8844977256457985e-05, + "loss": 0.252, + "step": 15579, + "teacher_loss": 0.23896434903144836 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.5243006944656372, + "learning_rate": 1.884278184771233e-05, + "loss": 0.2923, + "step": 15580, + "teacher_loss": 0.2665550112724304 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.4477646052837372, + "learning_rate": 1.8840586350863207e-05, + "loss": 0.2392, + "step": 15581, + "teacher_loss": 0.21598154306411743 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.17496567964553833, + "learning_rate": 1.8838390765960956e-05, + "loss": 0.1921, + "step": 15582, + "teacher_loss": 0.19396603107452393 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.8789926171302795, + "learning_rate": 1.883619509305591e-05, + "loss": 0.4427, + "step": 15583, + "teacher_loss": 0.3941769003868103 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.2837710976600647, + "learning_rate": 1.8833999332198418e-05, + "loss": 0.2505, + "step": 15584, + "teacher_loss": 0.24682967364788055 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 1.0828001499176025, + "learning_rate": 1.883180348343881e-05, + "loss": 0.3935, + "step": 15585, + "teacher_loss": 0.3168558180332184 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.34016817808151245, + "learning_rate": 1.8829607546827438e-05, + "loss": 0.2511, + "step": 15586, + "teacher_loss": 0.24124349653720856 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.45018357038497925, + "learning_rate": 1.8827411522414647e-05, + "loss": 0.3647, + "step": 15587, + "teacher_loss": 0.35515522956848145 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.2050551474094391, + "learning_rate": 1.8825215410250784e-05, + "loss": 0.2142, + "step": 15588, + "teacher_loss": 0.21523958444595337 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.28036320209503174, + "learning_rate": 1.8823019210386204e-05, + "loss": 0.1926, + "step": 15589, + "teacher_loss": 0.1828005313873291 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.4579232335090637, + "learning_rate": 1.8820822922871254e-05, + "loss": 0.2495, + "step": 15590, + "teacher_loss": 0.22639469802379608 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.20725765824317932, + "learning_rate": 1.881862654775629e-05, + "loss": 0.1704, + "step": 15591, + "teacher_loss": 0.16634106636047363 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.33307963609695435, + "learning_rate": 1.8816430085091663e-05, + "loss": 0.2136, + "step": 15592, + "teacher_loss": 0.20032566785812378 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.32874059677124023, + "learning_rate": 1.881423353492774e-05, + "loss": 0.243, + "step": 15593, + "teacher_loss": 0.2335001826286316 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.9663118124008179, + "learning_rate": 1.881203689731488e-05, + "loss": 0.3227, + "step": 15594, + "teacher_loss": 0.2512326240539551 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.4488135874271393, + "learning_rate": 1.8809840172303435e-05, + "loss": 0.2905, + "step": 15595, + "teacher_loss": 0.272901713848114 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.46793121099472046, + "learning_rate": 1.8807643359943788e-05, + "loss": 0.3281, + "step": 15596, + "teacher_loss": 0.31253546476364136 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.5057793855667114, + "learning_rate": 1.880544646028629e-05, + "loss": 0.4708, + "step": 15597, + "teacher_loss": 0.4669633209705353 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.45457014441490173, + "learning_rate": 1.880324947338131e-05, + "loss": 0.2302, + "step": 15598, + "teacher_loss": 0.20530733466148376 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.37621429562568665, + "learning_rate": 1.880105239927923e-05, + "loss": 0.233, + "step": 15599, + "teacher_loss": 0.21705016493797302 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.7735012173652649, + "learning_rate": 1.879885523803041e-05, + "loss": 0.2609, + "step": 15600, + "teacher_loss": 0.20392854511737823 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.4213787913322449, + "learning_rate": 1.879665798968523e-05, + "loss": 0.222, + "step": 15601, + "teacher_loss": 0.1998279094696045 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.2106642723083496, + "learning_rate": 1.8794460654294064e-05, + "loss": 0.2121, + "step": 15602, + "teacher_loss": 0.21228326857089996 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.9285902976989746, + "learning_rate": 1.8792263231907292e-05, + "loss": 0.3901, + "step": 15603, + "teacher_loss": 0.3303107023239136 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.3709350824356079, + "learning_rate": 1.87900657225753e-05, + "loss": 0.2995, + "step": 15604, + "teacher_loss": 0.29150816798210144 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.24274086952209473, + "learning_rate": 1.8787868126348455e-05, + "loss": 0.2068, + "step": 15605, + "teacher_loss": 0.20277884602546692 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.3534519672393799, + "learning_rate": 1.8785670443277156e-05, + "loss": 0.2386, + "step": 15606, + "teacher_loss": 0.22580553591251373 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.4775151014328003, + "learning_rate": 1.878347267341178e-05, + "loss": 0.2302, + "step": 15607, + "teacher_loss": 0.20275001227855682 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.44628381729125977, + "learning_rate": 1.8781274816802722e-05, + "loss": 0.2749, + "step": 15608, + "teacher_loss": 0.25584763288497925 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.7155665159225464, + "learning_rate": 1.877907687350037e-05, + "loss": 0.2676, + "step": 15609, + "teacher_loss": 0.21778368949890137 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.9141194224357605, + "learning_rate": 1.8776878843555114e-05, + "loss": 0.39, + "step": 15610, + "teacher_loss": 0.3317718505859375 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.5735787153244019, + "learning_rate": 1.877468072701735e-05, + "loss": 0.5461, + "step": 15611, + "teacher_loss": 0.5430393218994141 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.28447479009628296, + "learning_rate": 1.8772482523937475e-05, + "loss": 0.1939, + "step": 15612, + "teacher_loss": 0.18388940393924713 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.6083656549453735, + "learning_rate": 1.8770284234365883e-05, + "loss": 0.2487, + "step": 15613, + "teacher_loss": 0.20877036452293396 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.6459749937057495, + "learning_rate": 1.876808585835298e-05, + "loss": 0.2735, + "step": 15614, + "teacher_loss": 0.23210833966732025 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.37882575392723083, + "learning_rate": 1.876588739594916e-05, + "loss": 0.2116, + "step": 15615, + "teacher_loss": 0.19300782680511475 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.6242225170135498, + "learning_rate": 1.8763688847204843e-05, + "loss": 0.3597, + "step": 15616, + "teacher_loss": 0.3303227126598358 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.48707103729248047, + "learning_rate": 1.8761490212170416e-05, + "loss": 0.334, + "step": 15617, + "teacher_loss": 0.31697624921798706 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.48537909984588623, + "learning_rate": 1.87592914908963e-05, + "loss": 0.2359, + "step": 15618, + "teacher_loss": 0.20818506181240082 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.5380996465682983, + "learning_rate": 1.8757092683432903e-05, + "loss": 0.2435, + "step": 15619, + "teacher_loss": 0.21077196300029755 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.3553329110145569, + "learning_rate": 1.875489378983063e-05, + "loss": 0.2457, + "step": 15620, + "teacher_loss": 0.2334851622581482 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.618273138999939, + "learning_rate": 1.8752694810139903e-05, + "loss": 0.2574, + "step": 15621, + "teacher_loss": 0.21730799973011017 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.19615758955478668, + "learning_rate": 1.8750495744411137e-05, + "loss": 0.1547, + "step": 15622, + "teacher_loss": 0.15014046430587769 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.29901596903800964, + "learning_rate": 1.8748296592694744e-05, + "loss": 0.3083, + "step": 15623, + "teacher_loss": 0.3092902898788452 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.34863829612731934, + "learning_rate": 1.874609735504115e-05, + "loss": 0.2112, + "step": 15624, + "teacher_loss": 0.19590693712234497 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.2578679323196411, + "learning_rate": 1.8743898031500772e-05, + "loss": 0.4657, + "step": 15625, + "teacher_loss": 0.48884499073028564 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.6031060218811035, + "learning_rate": 1.874169862212404e-05, + "loss": 0.219, + "step": 15626, + "teacher_loss": 0.17628945410251617 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.39561349153518677, + "learning_rate": 1.8739499126961382e-05, + "loss": 0.4644, + "step": 15627, + "teacher_loss": 0.4720189571380615 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 1.214064598083496, + "learning_rate": 1.8737299546063213e-05, + "loss": 0.4031, + "step": 15628, + "teacher_loss": 0.3129780888557434 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.606117844581604, + "learning_rate": 1.8735099879479974e-05, + "loss": 0.2636, + "step": 15629, + "teacher_loss": 0.22556297481060028 + }, + { + "compression_loss": 0.0, + "epoch": 2.82, + "label_loss": 0.5360339879989624, + "learning_rate": 1.8732900127262094e-05, + "loss": 0.3415, + "step": 15630, + "teacher_loss": 0.31982964277267456 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.33991146087646484, + "learning_rate": 1.8730700289460005e-05, + "loss": 0.24, + "step": 15631, + "teacher_loss": 0.2288684844970703 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.26857253909111023, + "learning_rate": 1.8728500366124142e-05, + "loss": 0.2008, + "step": 15632, + "teacher_loss": 0.19331955909729004 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.504621148109436, + "learning_rate": 1.8726300357304942e-05, + "loss": 0.2489, + "step": 15633, + "teacher_loss": 0.2204873263835907 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5409475564956665, + "learning_rate": 1.8724100263052854e-05, + "loss": 0.273, + "step": 15634, + "teacher_loss": 0.2432001382112503 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.3308134973049164, + "learning_rate": 1.8721900083418306e-05, + "loss": 0.1748, + "step": 15635, + "teacher_loss": 0.15751197934150696 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.41927462816238403, + "learning_rate": 1.871969981845175e-05, + "loss": 0.3609, + "step": 15636, + "teacher_loss": 0.35441386699676514 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.7724589109420776, + "learning_rate": 1.8717499468203627e-05, + "loss": 0.3027, + "step": 15637, + "teacher_loss": 0.2504867911338806 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4513605833053589, + "learning_rate": 1.871529903272439e-05, + "loss": 0.23, + "step": 15638, + "teacher_loss": 0.20538701117038727 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.42484816908836365, + "learning_rate": 1.8713098512064485e-05, + "loss": 0.3451, + "step": 15639, + "teacher_loss": 0.33622509241104126 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5539860129356384, + "learning_rate": 1.871089790627436e-05, + "loss": 0.1974, + "step": 15640, + "teacher_loss": 0.15773595869541168 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.43911492824554443, + "learning_rate": 1.8708697215404478e-05, + "loss": 0.2656, + "step": 15641, + "teacher_loss": 0.24634799361228943 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.9357492327690125, + "learning_rate": 1.8706496439505285e-05, + "loss": 0.7289, + "step": 15642, + "teacher_loss": 0.7059305310249329 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.3149608373641968, + "learning_rate": 1.870429557862724e-05, + "loss": 0.245, + "step": 15643, + "teacher_loss": 0.23724766075611115 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5427775382995605, + "learning_rate": 1.8702094632820804e-05, + "loss": 0.249, + "step": 15644, + "teacher_loss": 0.2163078486919403 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 1.2592142820358276, + "learning_rate": 1.8699893602136438e-05, + "loss": 0.4716, + "step": 15645, + "teacher_loss": 0.38407737016677856 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5874372720718384, + "learning_rate": 1.8697692486624606e-05, + "loss": 0.2909, + "step": 15646, + "teacher_loss": 0.2579842805862427 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.40630921721458435, + "learning_rate": 1.869549128633577e-05, + "loss": 0.3103, + "step": 15647, + "teacher_loss": 0.2996810972690582 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.32114318013191223, + "learning_rate": 1.8693290001320398e-05, + "loss": 0.2274, + "step": 15648, + "teacher_loss": 0.21702814102172852 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.49709194898605347, + "learning_rate": 1.8691088631628964e-05, + "loss": 0.2864, + "step": 15649, + "teacher_loss": 0.2629718780517578 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4703463315963745, + "learning_rate": 1.8688887177311925e-05, + "loss": 0.2049, + "step": 15650, + "teacher_loss": 0.17542661726474762 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5739625692367554, + "learning_rate": 1.868668563841977e-05, + "loss": 0.3398, + "step": 15651, + "teacher_loss": 0.31378665566444397 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.26552414894104004, + "learning_rate": 1.8684484015002966e-05, + "loss": 0.1714, + "step": 15652, + "teacher_loss": 0.16098767518997192 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.32081177830696106, + "learning_rate": 1.8682282307111988e-05, + "loss": 0.2379, + "step": 15653, + "teacher_loss": 0.22868916392326355 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.23396648466587067, + "learning_rate": 1.868008051479732e-05, + "loss": 0.2617, + "step": 15654, + "teacher_loss": 0.26478099822998047 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5613775253295898, + "learning_rate": 1.8677878638109434e-05, + "loss": 0.6418, + "step": 15655, + "teacher_loss": 0.6507104635238647 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5904250741004944, + "learning_rate": 1.867567667709882e-05, + "loss": 0.2996, + "step": 15656, + "teacher_loss": 0.2672576308250427 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5111725330352783, + "learning_rate": 1.8673474631815962e-05, + "loss": 0.2568, + "step": 15657, + "teacher_loss": 0.22850503027439117 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.9034008979797363, + "learning_rate": 1.8671272502311343e-05, + "loss": 0.3059, + "step": 15658, + "teacher_loss": 0.23954731225967407 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.33696508407592773, + "learning_rate": 1.8669070288635454e-05, + "loss": 0.2637, + "step": 15659, + "teacher_loss": 0.25555580854415894 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.35835087299346924, + "learning_rate": 1.866686799083878e-05, + "loss": 0.1645, + "step": 15660, + "teacher_loss": 0.14300982654094696 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.30030542612075806, + "learning_rate": 1.866466560897182e-05, + "loss": 0.1432, + "step": 15661, + "teacher_loss": 0.12575635313987732 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4981169104576111, + "learning_rate": 1.8662463143085063e-05, + "loss": 0.3893, + "step": 15662, + "teacher_loss": 0.3771844804286957 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4201228618621826, + "learning_rate": 1.8660260593229007e-05, + "loss": 0.2382, + "step": 15663, + "teacher_loss": 0.2179337441921234 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.6173253655433655, + "learning_rate": 1.8658057959454154e-05, + "loss": 0.2886, + "step": 15664, + "teacher_loss": 0.25206518173217773 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.2200109362602234, + "learning_rate": 1.8655855241810995e-05, + "loss": 0.1687, + "step": 15665, + "teacher_loss": 0.16304980218410492 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.2391153872013092, + "learning_rate": 1.8653652440350036e-05, + "loss": 0.222, + "step": 15666, + "teacher_loss": 0.2200511395931244 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4334554970264435, + "learning_rate": 1.8651449555121785e-05, + "loss": 0.2428, + "step": 15667, + "teacher_loss": 0.22165828943252563 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5943639278411865, + "learning_rate": 1.8649246586176737e-05, + "loss": 0.4046, + "step": 15668, + "teacher_loss": 0.3835371136665344 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4538796842098236, + "learning_rate": 1.8647043533565407e-05, + "loss": 0.1859, + "step": 15669, + "teacher_loss": 0.15614989399909973 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5664341449737549, + "learning_rate": 1.8644840397338305e-05, + "loss": 0.2139, + "step": 15670, + "teacher_loss": 0.17472794651985168 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.3884039521217346, + "learning_rate": 1.8642637177545937e-05, + "loss": 0.2022, + "step": 15671, + "teacher_loss": 0.1815069019794464 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.9344276189804077, + "learning_rate": 1.8640433874238828e-05, + "loss": 0.3869, + "step": 15672, + "teacher_loss": 0.3260282576084137 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.3322821259498596, + "learning_rate": 1.8638230487467477e-05, + "loss": 0.3727, + "step": 15673, + "teacher_loss": 0.37714940309524536 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5182181000709534, + "learning_rate": 1.8636027017282413e-05, + "loss": 0.3984, + "step": 15674, + "teacher_loss": 0.38508763909339905 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5167944431304932, + "learning_rate": 1.863382346373415e-05, + "loss": 0.3163, + "step": 15675, + "teacher_loss": 0.29404598474502563 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.32424888014793396, + "learning_rate": 1.863161982687321e-05, + "loss": 0.2529, + "step": 15676, + "teacher_loss": 0.24494808912277222 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.2875154912471771, + "learning_rate": 1.8629416106750114e-05, + "loss": 0.2082, + "step": 15677, + "teacher_loss": 0.19933509826660156 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.25766521692276, + "learning_rate": 1.8627212303415387e-05, + "loss": 0.1554, + "step": 15678, + "teacher_loss": 0.144064798951149 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.4190814793109894, + "learning_rate": 1.862500841691956e-05, + "loss": 0.2516, + "step": 15679, + "teacher_loss": 0.2330310195684433 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.7138117551803589, + "learning_rate": 1.8622804447313158e-05, + "loss": 0.3107, + "step": 15680, + "teacher_loss": 0.2659236192703247 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.2788236439228058, + "learning_rate": 1.862060039464671e-05, + "loss": 0.2288, + "step": 15681, + "teacher_loss": 0.2232353836297989 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.5316205024719238, + "learning_rate": 1.8618396258970756e-05, + "loss": 0.352, + "step": 15682, + "teacher_loss": 0.33199450373649597 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.26798853278160095, + "learning_rate": 1.861619204033582e-05, + "loss": 0.2999, + "step": 15683, + "teacher_loss": 0.30348458886146545 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.6284560561180115, + "learning_rate": 1.861398773879244e-05, + "loss": 0.296, + "step": 15684, + "teacher_loss": 0.2590172290802002 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.3720332086086273, + "learning_rate": 1.8611783354391155e-05, + "loss": 0.1858, + "step": 15685, + "teacher_loss": 0.16506797075271606 + }, + { + "compression_loss": 0.0, + "epoch": 2.83, + "label_loss": 0.6247307062149048, + "learning_rate": 1.860957888718251e-05, + "loss": 0.3464, + "step": 15686, + "teacher_loss": 0.315435528755188 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5692695379257202, + "learning_rate": 1.8607374337217047e-05, + "loss": 0.2624, + "step": 15687, + "teacher_loss": 0.2282578945159912 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.6733032464981079, + "learning_rate": 1.86051697045453e-05, + "loss": 0.2681, + "step": 15688, + "teacher_loss": 0.22306840121746063 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 1.0777983665466309, + "learning_rate": 1.860296498921782e-05, + "loss": 0.3977, + "step": 15689, + "teacher_loss": 0.322085976600647 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.7687364220619202, + "learning_rate": 1.8600760191285156e-05, + "loss": 0.3301, + "step": 15690, + "teacher_loss": 0.2813347578048706 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4539843201637268, + "learning_rate": 1.859855531079786e-05, + "loss": 0.3156, + "step": 15691, + "teacher_loss": 0.30026572942733765 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5300315022468567, + "learning_rate": 1.859635034780648e-05, + "loss": 0.2781, + "step": 15692, + "teacher_loss": 0.2501027584075928 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.36437398195266724, + "learning_rate": 1.8594145302361565e-05, + "loss": 0.2249, + "step": 15693, + "teacher_loss": 0.20937681198120117 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.704693615436554, + "learning_rate": 1.859194017451368e-05, + "loss": 0.3094, + "step": 15694, + "teacher_loss": 0.26544734835624695 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5063294172286987, + "learning_rate": 1.858973496431337e-05, + "loss": 0.2748, + "step": 15695, + "teacher_loss": 0.24907800555229187 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.2795817255973816, + "learning_rate": 1.8587529671811196e-05, + "loss": 0.1987, + "step": 15696, + "teacher_loss": 0.1897178739309311 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.23716753721237183, + "learning_rate": 1.8585324297057733e-05, + "loss": 0.3051, + "step": 15697, + "teacher_loss": 0.31267133355140686 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.2384091019630432, + "learning_rate": 1.8583118840103527e-05, + "loss": 0.1933, + "step": 15698, + "teacher_loss": 0.18823449313640594 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 1.171275019645691, + "learning_rate": 1.858091330099915e-05, + "loss": 0.4972, + "step": 15699, + "teacher_loss": 0.4222884774208069 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.7586273550987244, + "learning_rate": 1.8578707679795167e-05, + "loss": 0.3173, + "step": 15700, + "teacher_loss": 0.2682155966758728 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.9817002415657043, + "learning_rate": 1.8576501976542147e-05, + "loss": 0.2851, + "step": 15701, + "teacher_loss": 0.20771317183971405 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.22894296050071716, + "learning_rate": 1.8574296191290656e-05, + "loss": 0.2268, + "step": 15702, + "teacher_loss": 0.22658196091651917 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.7663373351097107, + "learning_rate": 1.8572090324091273e-05, + "loss": 0.328, + "step": 15703, + "teacher_loss": 0.2793191075325012 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.7877432107925415, + "learning_rate": 1.856988437499457e-05, + "loss": 0.3482, + "step": 15704, + "teacher_loss": 0.29936063289642334 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.527571976184845, + "learning_rate": 1.856767834405112e-05, + "loss": 0.3368, + "step": 15705, + "teacher_loss": 0.31564217805862427 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5882084369659424, + "learning_rate": 1.85654722313115e-05, + "loss": 0.2295, + "step": 15706, + "teacher_loss": 0.18961066007614136 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.8490410447120667, + "learning_rate": 1.8563266036826295e-05, + "loss": 0.3169, + "step": 15707, + "teacher_loss": 0.2577856183052063 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.2687148153781891, + "learning_rate": 1.8561059760646082e-05, + "loss": 0.1715, + "step": 15708, + "teacher_loss": 0.16073903441429138 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.3655679225921631, + "learning_rate": 1.8558853402821444e-05, + "loss": 0.2495, + "step": 15709, + "teacher_loss": 0.23658691346645355 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.26435890793800354, + "learning_rate": 1.8556646963402965e-05, + "loss": 0.1666, + "step": 15710, + "teacher_loss": 0.15578149259090424 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.9528557062149048, + "learning_rate": 1.8554440442441242e-05, + "loss": 0.3636, + "step": 15711, + "teacher_loss": 0.29816627502441406 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5803512334823608, + "learning_rate": 1.8552233839986848e-05, + "loss": 0.2559, + "step": 15712, + "teacher_loss": 0.21987971663475037 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.2601315379142761, + "learning_rate": 1.8550027156090385e-05, + "loss": 0.1929, + "step": 15713, + "teacher_loss": 0.18540364503860474 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.1583322286605835, + "learning_rate": 1.854782039080245e-05, + "loss": 0.1959, + "step": 15714, + "teacher_loss": 0.20003946125507355 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5300459265708923, + "learning_rate": 1.8545613544173623e-05, + "loss": 0.2989, + "step": 15715, + "teacher_loss": 0.2732434868812561 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.49268850684165955, + "learning_rate": 1.854340661625451e-05, + "loss": 0.2754, + "step": 15716, + "teacher_loss": 0.2512151598930359 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.3916330635547638, + "learning_rate": 1.8541199607095705e-05, + "loss": 0.2226, + "step": 15717, + "teacher_loss": 0.20382243394851685 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.34222325682640076, + "learning_rate": 1.853899251674781e-05, + "loss": 0.1875, + "step": 15718, + "teacher_loss": 0.1703435331583023 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.6559106707572937, + "learning_rate": 1.8536785345261428e-05, + "loss": 0.1871, + "step": 15719, + "teacher_loss": 0.13496747612953186 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.807537853717804, + "learning_rate": 1.8534578092687163e-05, + "loss": 0.7528, + "step": 15720, + "teacher_loss": 0.7467369437217712 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.222446471452713, + "learning_rate": 1.8532370759075616e-05, + "loss": 0.2122, + "step": 15721, + "teacher_loss": 0.21110032498836517 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.44959917664527893, + "learning_rate": 1.8530163344477406e-05, + "loss": 0.2349, + "step": 15722, + "teacher_loss": 0.21101459860801697 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.7312188148498535, + "learning_rate": 1.8527955848943125e-05, + "loss": 0.6522, + "step": 15723, + "teacher_loss": 0.6433976292610168 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.3048573136329651, + "learning_rate": 1.8525748272523397e-05, + "loss": 0.2784, + "step": 15724, + "teacher_loss": 0.27547207474708557 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.419810026884079, + "learning_rate": 1.852354061526884e-05, + "loss": 0.2546, + "step": 15725, + "teacher_loss": 0.23624543845653534 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4434985816478729, + "learning_rate": 1.8521332877230047e-05, + "loss": 0.3345, + "step": 15726, + "teacher_loss": 0.3223349452018738 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4418366551399231, + "learning_rate": 1.851912505845766e-05, + "loss": 0.2966, + "step": 15727, + "teacher_loss": 0.28050196170806885 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.846725344657898, + "learning_rate": 1.851691715900228e-05, + "loss": 0.3162, + "step": 15728, + "teacher_loss": 0.2572518587112427 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5746957063674927, + "learning_rate": 1.8514709178914533e-05, + "loss": 0.3229, + "step": 15729, + "teacher_loss": 0.2949431538581848 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4985443949699402, + "learning_rate": 1.8512501118245046e-05, + "loss": 0.4783, + "step": 15730, + "teacher_loss": 0.4760809540748596 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.40349799394607544, + "learning_rate": 1.8510292977044434e-05, + "loss": 0.241, + "step": 15731, + "teacher_loss": 0.22298294305801392 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4171718955039978, + "learning_rate": 1.8508084755363335e-05, + "loss": 0.301, + "step": 15732, + "teacher_loss": 0.2881176471710205 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4362102746963501, + "learning_rate": 1.8505876453252368e-05, + "loss": 0.324, + "step": 15733, + "teacher_loss": 0.311529278755188 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.10716117918491364, + "learning_rate": 1.8503668070762165e-05, + "loss": 0.2117, + "step": 15734, + "teacher_loss": 0.2233637124300003 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5705432891845703, + "learning_rate": 1.850145960794336e-05, + "loss": 0.1998, + "step": 15735, + "teacher_loss": 0.1586064100265503 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.3731476962566376, + "learning_rate": 1.8499251064846576e-05, + "loss": 0.267, + "step": 15736, + "teacher_loss": 0.2551867365837097 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4048183560371399, + "learning_rate": 1.8497042441522464e-05, + "loss": 0.3202, + "step": 15737, + "teacher_loss": 0.31074345111846924 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.5419814586639404, + "learning_rate": 1.8494833738021655e-05, + "loss": 0.3041, + "step": 15738, + "teacher_loss": 0.2776755094528198 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.34955430030822754, + "learning_rate": 1.8492624954394782e-05, + "loss": 0.1913, + "step": 15739, + "teacher_loss": 0.1737421751022339 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.38567274808883667, + "learning_rate": 1.849041609069249e-05, + "loss": 0.2998, + "step": 15740, + "teacher_loss": 0.29028838872909546 + }, + { + "compression_loss": 0.0, + "epoch": 2.84, + "label_loss": 0.4437521696090698, + "learning_rate": 1.8488207146965423e-05, + "loss": 0.2128, + "step": 15741, + "teacher_loss": 0.18712544441223145 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.3764735162258148, + "learning_rate": 1.8485998123264222e-05, + "loss": 0.413, + "step": 15742, + "teacher_loss": 0.41708898544311523 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.2732475996017456, + "learning_rate": 1.8483789019639537e-05, + "loss": 0.2142, + "step": 15743, + "teacher_loss": 0.20763376355171204 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.7141246795654297, + "learning_rate": 1.8481579836142016e-05, + "loss": 0.3103, + "step": 15744, + "teacher_loss": 0.26540425419807434 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5449739694595337, + "learning_rate": 1.847937057282231e-05, + "loss": 0.3066, + "step": 15745, + "teacher_loss": 0.28010839223861694 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.6905507445335388, + "learning_rate": 1.8477161229731066e-05, + "loss": 0.2589, + "step": 15746, + "teacher_loss": 0.21091032028198242 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 1.1155308485031128, + "learning_rate": 1.847495180691894e-05, + "loss": 0.4076, + "step": 15747, + "teacher_loss": 0.3289552927017212 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.22949522733688354, + "learning_rate": 1.8472742304436586e-05, + "loss": 0.214, + "step": 15748, + "teacher_loss": 0.21227796375751495 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5418639183044434, + "learning_rate": 1.8470532722334664e-05, + "loss": 0.22, + "step": 15749, + "teacher_loss": 0.1842201054096222 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.6393435597419739, + "learning_rate": 1.8468323060663832e-05, + "loss": 0.2975, + "step": 15750, + "teacher_loss": 0.2595379948616028 + }, + { + "epoch": 2.85, + "eval_exact_match": 79.75402081362347, + "eval_f1": 87.38767630119374, + "step": 15750 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.2720993757247925, + "learning_rate": 1.8466113319474747e-05, + "loss": 0.1668, + "step": 15751, + "teacher_loss": 0.1550801694393158 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.6986119747161865, + "learning_rate": 1.8463903498818088e-05, + "loss": 0.3543, + "step": 15752, + "teacher_loss": 0.31603682041168213 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.425250381231308, + "learning_rate": 1.846169359874449e-05, + "loss": 0.2327, + "step": 15753, + "teacher_loss": 0.21135637164115906 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.6598292589187622, + "learning_rate": 1.8459483619304648e-05, + "loss": 0.2813, + "step": 15754, + "teacher_loss": 0.23922353982925415 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.22085297107696533, + "learning_rate": 1.8457273560549216e-05, + "loss": 0.1534, + "step": 15755, + "teacher_loss": 0.14585380256175995 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5688523054122925, + "learning_rate": 1.8455063422528865e-05, + "loss": 0.2156, + "step": 15756, + "teacher_loss": 0.17639166116714478 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.8762666583061218, + "learning_rate": 1.8452853205294273e-05, + "loss": 0.391, + "step": 15757, + "teacher_loss": 0.33703452348709106 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.297713041305542, + "learning_rate": 1.8450642908896104e-05, + "loss": 0.2478, + "step": 15758, + "teacher_loss": 0.24224433302879333 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5520319938659668, + "learning_rate": 1.844843253338504e-05, + "loss": 0.2644, + "step": 15759, + "teacher_loss": 0.23243862390518188 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.40462255477905273, + "learning_rate": 1.844622207881176e-05, + "loss": 0.2567, + "step": 15760, + "teacher_loss": 0.24028655886650085 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.18966549634933472, + "learning_rate": 1.8444011545226934e-05, + "loss": 0.1812, + "step": 15761, + "teacher_loss": 0.18027850985527039 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.23252812027931213, + "learning_rate": 1.8441800932681257e-05, + "loss": 0.285, + "step": 15762, + "teacher_loss": 0.29081469774246216 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.30862972140312195, + "learning_rate": 1.8439590241225396e-05, + "loss": 0.1715, + "step": 15763, + "teacher_loss": 0.15621252357959747 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.29161760210990906, + "learning_rate": 1.843737947091005e-05, + "loss": 0.303, + "step": 15764, + "teacher_loss": 0.3042460083961487 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.8093780279159546, + "learning_rate": 1.843516862178589e-05, + "loss": 0.286, + "step": 15765, + "teacher_loss": 0.22782929241657257 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.2768884301185608, + "learning_rate": 1.843295769390362e-05, + "loss": 0.1797, + "step": 15766, + "teacher_loss": 0.1689174473285675 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.27470147609710693, + "learning_rate": 1.8430746687313923e-05, + "loss": 0.2324, + "step": 15767, + "teacher_loss": 0.2276635766029358 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5782203078269958, + "learning_rate": 1.8428535602067486e-05, + "loss": 0.38, + "step": 15768, + "teacher_loss": 0.35796087980270386 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.9145200252532959, + "learning_rate": 1.8426324438215005e-05, + "loss": 0.4915, + "step": 15769, + "teacher_loss": 0.44447994232177734 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.4878411889076233, + "learning_rate": 1.842411319580718e-05, + "loss": 0.2783, + "step": 15770, + "teacher_loss": 0.25507229566574097 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5145754218101501, + "learning_rate": 1.842190187489471e-05, + "loss": 0.2432, + "step": 15771, + "teacher_loss": 0.212994784116745 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.46705102920532227, + "learning_rate": 1.8419690475528286e-05, + "loss": 0.2684, + "step": 15772, + "teacher_loss": 0.24628250300884247 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.6455156207084656, + "learning_rate": 1.841747899775861e-05, + "loss": 0.3441, + "step": 15773, + "teacher_loss": 0.3105820417404175 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.7152342200279236, + "learning_rate": 1.8415267441636388e-05, + "loss": 0.4826, + "step": 15774, + "teacher_loss": 0.4567483067512512 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.7802227735519409, + "learning_rate": 1.8413055807212324e-05, + "loss": 0.2188, + "step": 15775, + "teacher_loss": 0.1564652919769287 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.19174207746982574, + "learning_rate": 1.8410844094537124e-05, + "loss": 0.1793, + "step": 15776, + "teacher_loss": 0.1778654158115387 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.15918049216270447, + "learning_rate": 1.8408632303661494e-05, + "loss": 0.1691, + "step": 15777, + "teacher_loss": 0.17019785940647125 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.29411840438842773, + "learning_rate": 1.8406420434636144e-05, + "loss": 0.1796, + "step": 15778, + "teacher_loss": 0.16691213846206665 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.41873395442962646, + "learning_rate": 1.8404208487511786e-05, + "loss": 0.1891, + "step": 15779, + "teacher_loss": 0.1635519564151764 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.3061790466308594, + "learning_rate": 1.8401996462339138e-05, + "loss": 0.2715, + "step": 15780, + "teacher_loss": 0.26762598752975464 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.38915109634399414, + "learning_rate": 1.8399784359168904e-05, + "loss": 0.2281, + "step": 15781, + "teacher_loss": 0.21018287539482117 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.3290069103240967, + "learning_rate": 1.839757217805182e-05, + "loss": 0.1862, + "step": 15782, + "teacher_loss": 0.1703346222639084 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.3589189350605011, + "learning_rate": 1.839535991903858e-05, + "loss": 0.246, + "step": 15783, + "teacher_loss": 0.233504980802536 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5270801782608032, + "learning_rate": 1.839314758217992e-05, + "loss": 0.2206, + "step": 15784, + "teacher_loss": 0.1865430474281311 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.33380383253097534, + "learning_rate": 1.839093516752656e-05, + "loss": 0.201, + "step": 15785, + "teacher_loss": 0.18623086810112 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5628454685211182, + "learning_rate": 1.838872267512922e-05, + "loss": 0.3038, + "step": 15786, + "teacher_loss": 0.27499938011169434 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5338721871376038, + "learning_rate": 1.8386510105038636e-05, + "loss": 0.1982, + "step": 15787, + "teacher_loss": 0.16092006862163544 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.6219146847724915, + "learning_rate": 1.8384297457305524e-05, + "loss": 0.337, + "step": 15788, + "teacher_loss": 0.3053082227706909 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.4425116777420044, + "learning_rate": 1.838208473198062e-05, + "loss": 0.2227, + "step": 15789, + "teacher_loss": 0.198236882686615 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.7788664102554321, + "learning_rate": 1.8379871929114652e-05, + "loss": 0.3461, + "step": 15790, + "teacher_loss": 0.29796069860458374 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5995031595230103, + "learning_rate": 1.8377659048758347e-05, + "loss": 0.2554, + "step": 15791, + "teacher_loss": 0.21720127761363983 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.24850031733512878, + "learning_rate": 1.8375446090962458e-05, + "loss": 0.2696, + "step": 15792, + "teacher_loss": 0.27193427085876465 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.3667680621147156, + "learning_rate": 1.8373233055777705e-05, + "loss": 0.2201, + "step": 15793, + "teacher_loss": 0.20380929112434387 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.5167076587677002, + "learning_rate": 1.837101994325483e-05, + "loss": 0.2164, + "step": 15794, + "teacher_loss": 0.18301963806152344 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.7842326164245605, + "learning_rate": 1.8368806753444578e-05, + "loss": 0.3552, + "step": 15795, + "teacher_loss": 0.3075253963470459 + }, + { + "compression_loss": 0.0, + "epoch": 2.85, + "label_loss": 0.610200822353363, + "learning_rate": 1.8366593486397688e-05, + "loss": 0.2828, + "step": 15796, + "teacher_loss": 0.24646055698394775 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.1728951632976532, + "learning_rate": 1.8364380142164904e-05, + "loss": 0.1396, + "step": 15797, + "teacher_loss": 0.13586542010307312 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.816399335861206, + "learning_rate": 1.8362166720796966e-05, + "loss": 0.2978, + "step": 15798, + "teacher_loss": 0.24016831815242767 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.2552238702774048, + "learning_rate": 1.8359953222344626e-05, + "loss": 0.1907, + "step": 15799, + "teacher_loss": 0.1835801601409912 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.7772147059440613, + "learning_rate": 1.835773964685863e-05, + "loss": 0.3823, + "step": 15800, + "teacher_loss": 0.3384694457054138 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 1.3603225946426392, + "learning_rate": 1.8355525994389737e-05, + "loss": 0.3377, + "step": 15801, + "teacher_loss": 0.22407333552837372 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.3928898572921753, + "learning_rate": 1.835331226498869e-05, + "loss": 0.2548, + "step": 15802, + "teacher_loss": 0.2394246608018875 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.33300483226776123, + "learning_rate": 1.8351098458706246e-05, + "loss": 0.2359, + "step": 15803, + "teacher_loss": 0.22513073682785034 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.5865911245346069, + "learning_rate": 1.834888457559316e-05, + "loss": 0.2456, + "step": 15804, + "teacher_loss": 0.20774272084236145 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.48009568452835083, + "learning_rate": 1.8346670615700195e-05, + "loss": 0.3015, + "step": 15805, + "teacher_loss": 0.28160303831100464 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 1.0230073928833008, + "learning_rate": 1.8344456579078103e-05, + "loss": 0.3164, + "step": 15806, + "teacher_loss": 0.2378537356853485 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.5411422252655029, + "learning_rate": 1.8342242465777655e-05, + "loss": 0.1899, + "step": 15807, + "teacher_loss": 0.15092778205871582 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.47372472286224365, + "learning_rate": 1.8340028275849602e-05, + "loss": 0.2638, + "step": 15808, + "teacher_loss": 0.24047425389289856 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.37042713165283203, + "learning_rate": 1.8337814009344716e-05, + "loss": 0.3391, + "step": 15809, + "teacher_loss": 0.33559519052505493 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.3425653874874115, + "learning_rate": 1.8335599666313764e-05, + "loss": 0.1931, + "step": 15810, + "teacher_loss": 0.17654749751091003 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.21195080876350403, + "learning_rate": 1.8333385246807507e-05, + "loss": 0.1911, + "step": 15811, + "teacher_loss": 0.18874311447143555 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.355308473110199, + "learning_rate": 1.833117075087673e-05, + "loss": 0.2373, + "step": 15812, + "teacher_loss": 0.2242126166820526 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.23206931352615356, + "learning_rate": 1.8328956178572187e-05, + "loss": 0.2131, + "step": 15813, + "teacher_loss": 0.21103033423423767 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 1.2173418998718262, + "learning_rate": 1.8326741529944663e-05, + "loss": 0.4846, + "step": 15814, + "teacher_loss": 0.40314981341362 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.4791605472564697, + "learning_rate": 1.832452680504493e-05, + "loss": 0.3586, + "step": 15815, + "teacher_loss": 0.34525907039642334 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.943343460559845, + "learning_rate": 1.8322312003923757e-05, + "loss": 0.4278, + "step": 15816, + "teacher_loss": 0.3705439567565918 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.9232819080352783, + "learning_rate": 1.832009712663194e-05, + "loss": 0.298, + "step": 15817, + "teacher_loss": 0.2284882366657257 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.41511350870132446, + "learning_rate": 1.8317882173220244e-05, + "loss": 0.2363, + "step": 15818, + "teacher_loss": 0.216459259390831 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.22387650609016418, + "learning_rate": 1.831566714373946e-05, + "loss": 0.2385, + "step": 15819, + "teacher_loss": 0.24014881253242493 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.389695405960083, + "learning_rate": 1.8313452038240375e-05, + "loss": 0.4148, + "step": 15820, + "teacher_loss": 0.4176251292228699 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.6074966788291931, + "learning_rate": 1.831123685677376e-05, + "loss": 0.4295, + "step": 15821, + "teacher_loss": 0.40977340936660767 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 1.0168144702911377, + "learning_rate": 1.8309021599390415e-05, + "loss": 0.4211, + "step": 15822, + "teacher_loss": 0.3549244999885559 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.16227078437805176, + "learning_rate": 1.830680626614113e-05, + "loss": 0.2178, + "step": 15823, + "teacher_loss": 0.22392481565475464 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.5151613354682922, + "learning_rate": 1.830459085707668e-05, + "loss": 0.2983, + "step": 15824, + "teacher_loss": 0.27420979738235474 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.33182772994041443, + "learning_rate": 1.8302375372247878e-05, + "loss": 0.2378, + "step": 15825, + "teacher_loss": 0.22733959555625916 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.4617161452770233, + "learning_rate": 1.830015981170551e-05, + "loss": 0.3127, + "step": 15826, + "teacher_loss": 0.29609590768814087 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.2880086898803711, + "learning_rate": 1.829794417550037e-05, + "loss": 0.2567, + "step": 15827, + "teacher_loss": 0.2532292604446411 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.4655439257621765, + "learning_rate": 1.829572846368326e-05, + "loss": 0.4208, + "step": 15828, + "teacher_loss": 0.4157954454421997 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.2411046028137207, + "learning_rate": 1.8293512676304973e-05, + "loss": 0.2337, + "step": 15829, + "teacher_loss": 0.2328571379184723 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.32122257351875305, + "learning_rate": 1.8291296813416315e-05, + "loss": 0.2781, + "step": 15830, + "teacher_loss": 0.2732689380645752 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.7128485441207886, + "learning_rate": 1.8289080875068094e-05, + "loss": 0.2558, + "step": 15831, + "teacher_loss": 0.20498883724212646 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.4479658603668213, + "learning_rate": 1.8286864861311105e-05, + "loss": 0.3956, + "step": 15832, + "teacher_loss": 0.38976895809173584 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.5889577269554138, + "learning_rate": 1.8284648772196162e-05, + "loss": 0.4092, + "step": 15833, + "teacher_loss": 0.3891795873641968 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.3624516725540161, + "learning_rate": 1.828243260777407e-05, + "loss": 0.3621, + "step": 15834, + "teacher_loss": 0.3620643615722656 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.4362243115901947, + "learning_rate": 1.828021636809564e-05, + "loss": 0.3267, + "step": 15835, + "teacher_loss": 0.31456223130226135 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.3359057307243347, + "learning_rate": 1.8278000053211677e-05, + "loss": 0.2108, + "step": 15836, + "teacher_loss": 0.19689424335956573 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.41603606939315796, + "learning_rate": 1.8275783663173013e-05, + "loss": 0.336, + "step": 15837, + "teacher_loss": 0.3270907998085022 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.7121304273605347, + "learning_rate": 1.827356719803044e-05, + "loss": 0.3189, + "step": 15838, + "teacher_loss": 0.2752155363559723 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.37949061393737793, + "learning_rate": 1.8271350657834792e-05, + "loss": 0.2746, + "step": 15839, + "teacher_loss": 0.2629939317703247 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.5888032913208008, + "learning_rate": 1.826913404263688e-05, + "loss": 0.4566, + "step": 15840, + "teacher_loss": 0.441903293132782 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.6193324327468872, + "learning_rate": 1.826691735248752e-05, + "loss": 0.2668, + "step": 15841, + "teacher_loss": 0.22764629125595093 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.2719542980194092, + "learning_rate": 1.8264700587437547e-05, + "loss": 0.1629, + "step": 15842, + "teacher_loss": 0.15083414316177368 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.5249314308166504, + "learning_rate": 1.8262483747537777e-05, + "loss": 0.2956, + "step": 15843, + "teacher_loss": 0.27013659477233887 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.6424548625946045, + "learning_rate": 1.8260266832839032e-05, + "loss": 0.339, + "step": 15844, + "teacher_loss": 0.3052673935890198 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.3115832805633545, + "learning_rate": 1.825804984339215e-05, + "loss": 0.3466, + "step": 15845, + "teacher_loss": 0.3504369556903839 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.6472749710083008, + "learning_rate": 1.8255832779247946e-05, + "loss": 0.2736, + "step": 15846, + "teacher_loss": 0.23211097717285156 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.8693272471427917, + "learning_rate": 1.8253615640457263e-05, + "loss": 0.2758, + "step": 15847, + "teacher_loss": 0.20985053479671478 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 1.0800511837005615, + "learning_rate": 1.8251398427070926e-05, + "loss": 0.3452, + "step": 15848, + "teacher_loss": 0.2635941207408905 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.8767670392990112, + "learning_rate": 1.824918113913977e-05, + "loss": 0.34, + "step": 15849, + "teacher_loss": 0.28037768602371216 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.8337864279747009, + "learning_rate": 1.824696377671464e-05, + "loss": 0.3117, + "step": 15850, + "teacher_loss": 0.2536882162094116 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 1.2983940839767456, + "learning_rate": 1.8244746339846353e-05, + "loss": 0.5908, + "step": 15851, + "teacher_loss": 0.512146532535553 + }, + { + "compression_loss": 0.0, + "epoch": 2.86, + "label_loss": 0.376639187335968, + "learning_rate": 1.824252882858577e-05, + "loss": 0.2354, + "step": 15852, + "teacher_loss": 0.2196727991104126 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.15303528308868408, + "learning_rate": 1.824031124298372e-05, + "loss": 0.1917, + "step": 15853, + "teacher_loss": 0.1960466206073761 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.41173696517944336, + "learning_rate": 1.823809358309104e-05, + "loss": 0.3155, + "step": 15854, + "teacher_loss": 0.3048304319381714 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.28301042318344116, + "learning_rate": 1.8235875848958593e-05, + "loss": 0.1679, + "step": 15855, + "teacher_loss": 0.15508979558944702 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.34113240242004395, + "learning_rate": 1.8233658040637212e-05, + "loss": 0.3067, + "step": 15856, + "teacher_loss": 0.3028240203857422 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.7279713749885559, + "learning_rate": 1.8231440158177747e-05, + "loss": 0.2719, + "step": 15857, + "teacher_loss": 0.2212262749671936 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.9390513896942139, + "learning_rate": 1.8229222201631045e-05, + "loss": 0.3198, + "step": 15858, + "teacher_loss": 0.250988245010376 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3111320734024048, + "learning_rate": 1.822700417104796e-05, + "loss": 0.1913, + "step": 15859, + "teacher_loss": 0.17796854674816132 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3001253008842468, + "learning_rate": 1.822478606647935e-05, + "loss": 0.2889, + "step": 15860, + "teacher_loss": 0.2876701056957245 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.21206986904144287, + "learning_rate": 1.822256788797606e-05, + "loss": 0.2026, + "step": 15861, + "teacher_loss": 0.20155739784240723 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3080791234970093, + "learning_rate": 1.822034963558895e-05, + "loss": 0.3215, + "step": 15862, + "teacher_loss": 0.32301846146583557 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.34815913438796997, + "learning_rate": 1.8218131309368876e-05, + "loss": 0.2234, + "step": 15863, + "teacher_loss": 0.20953653752803802 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.629808783531189, + "learning_rate": 1.8215912909366704e-05, + "loss": 0.3136, + "step": 15864, + "teacher_loss": 0.27846044301986694 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.33816325664520264, + "learning_rate": 1.821369443563329e-05, + "loss": 0.3147, + "step": 15865, + "teacher_loss": 0.3120826482772827 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.7259360551834106, + "learning_rate": 1.8211475888219492e-05, + "loss": 0.2269, + "step": 15866, + "teacher_loss": 0.17143885791301727 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.1640152633190155, + "learning_rate": 1.8209257267176185e-05, + "loss": 0.2365, + "step": 15867, + "teacher_loss": 0.24452602863311768 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.21413137018680573, + "learning_rate": 1.8207038572554232e-05, + "loss": 0.2584, + "step": 15868, + "teacher_loss": 0.26330065727233887 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.26071488857269287, + "learning_rate": 1.8204819804404497e-05, + "loss": 0.1597, + "step": 15869, + "teacher_loss": 0.1484990268945694 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3123800456523895, + "learning_rate": 1.8202600962777856e-05, + "loss": 0.2677, + "step": 15870, + "teacher_loss": 0.2627692222595215 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.8645068407058716, + "learning_rate": 1.820038204772517e-05, + "loss": 0.4702, + "step": 15871, + "teacher_loss": 0.4263474941253662 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.6720243692398071, + "learning_rate": 1.8198163059297324e-05, + "loss": 0.336, + "step": 15872, + "teacher_loss": 0.29860880970954895 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.13621313869953156, + "learning_rate": 1.8195943997545187e-05, + "loss": 0.1866, + "step": 15873, + "teacher_loss": 0.19216597080230713 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.33425289392471313, + "learning_rate": 1.8193724862519634e-05, + "loss": 0.2293, + "step": 15874, + "teacher_loss": 0.2176315188407898 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3725906014442444, + "learning_rate": 1.819150565427155e-05, + "loss": 0.2276, + "step": 15875, + "teacher_loss": 0.2114555984735489 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.1661052107810974, + "learning_rate": 1.8189286372851805e-05, + "loss": 0.1528, + "step": 15876, + "teacher_loss": 0.15127533674240112 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 1.0482935905456543, + "learning_rate": 1.818706701831129e-05, + "loss": 0.6401, + "step": 15877, + "teacher_loss": 0.5946966409683228 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.6520440578460693, + "learning_rate": 1.818484759070088e-05, + "loss": 0.3157, + "step": 15878, + "teacher_loss": 0.2783043086528778 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.7370253801345825, + "learning_rate": 1.8182628090071462e-05, + "loss": 0.3632, + "step": 15879, + "teacher_loss": 0.321697473526001 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3806958496570587, + "learning_rate": 1.8180408516473933e-05, + "loss": 0.2371, + "step": 15880, + "teacher_loss": 0.22116263210773468 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.18035593628883362, + "learning_rate": 1.817818886995916e-05, + "loss": 0.1591, + "step": 15881, + "teacher_loss": 0.15674445033073425 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3787170648574829, + "learning_rate": 1.8175969150578052e-05, + "loss": 0.3046, + "step": 15882, + "teacher_loss": 0.29638195037841797 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 1.3779008388519287, + "learning_rate": 1.817374935838149e-05, + "loss": 0.3741, + "step": 15883, + "teacher_loss": 0.26260918378829956 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.8832082748413086, + "learning_rate": 1.8171529493420375e-05, + "loss": 0.3496, + "step": 15884, + "teacher_loss": 0.29030150175094604 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.47625166177749634, + "learning_rate": 1.816930955574559e-05, + "loss": 0.2284, + "step": 15885, + "teacher_loss": 0.2009044885635376 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.19757282733917236, + "learning_rate": 1.8167089545408046e-05, + "loss": 0.1843, + "step": 15886, + "teacher_loss": 0.18281352519989014 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.4135333299636841, + "learning_rate": 1.8164869462458635e-05, + "loss": 0.3219, + "step": 15887, + "teacher_loss": 0.3117671012878418 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.22263991832733154, + "learning_rate": 1.816264930694825e-05, + "loss": 0.2269, + "step": 15888, + "teacher_loss": 0.22738727927207947 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.6423806548118591, + "learning_rate": 1.81604290789278e-05, + "loss": 0.4091, + "step": 15889, + "teacher_loss": 0.3831944763660431 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.31817013025283813, + "learning_rate": 1.815820877844819e-05, + "loss": 0.2287, + "step": 15890, + "teacher_loss": 0.2187390923500061 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.6518374681472778, + "learning_rate": 1.815598840556032e-05, + "loss": 0.3494, + "step": 15891, + "teacher_loss": 0.3157833516597748 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.24078303575515747, + "learning_rate": 1.8153767960315095e-05, + "loss": 0.2509, + "step": 15892, + "teacher_loss": 0.25207698345184326 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.6049313545227051, + "learning_rate": 1.815154744276343e-05, + "loss": 0.2603, + "step": 15893, + "teacher_loss": 0.2220241129398346 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3835017681121826, + "learning_rate": 1.8149326852956232e-05, + "loss": 0.2012, + "step": 15894, + "teacher_loss": 0.18099913001060486 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.4521665573120117, + "learning_rate": 1.814710619094441e-05, + "loss": 0.2787, + "step": 15895, + "teacher_loss": 0.2594741880893707 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.17418499290943146, + "learning_rate": 1.8144885456778873e-05, + "loss": 0.1879, + "step": 15896, + "teacher_loss": 0.18939995765686035 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3316029906272888, + "learning_rate": 1.8142664650510546e-05, + "loss": 0.2851, + "step": 15897, + "teacher_loss": 0.27993136644363403 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.7412432432174683, + "learning_rate": 1.8140443772190344e-05, + "loss": 0.2286, + "step": 15898, + "teacher_loss": 0.17165914177894592 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.23797626793384552, + "learning_rate": 1.8138222821869176e-05, + "loss": 0.1936, + "step": 15899, + "teacher_loss": 0.18867117166519165 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.7001538276672363, + "learning_rate": 1.813600179959797e-05, + "loss": 0.2643, + "step": 15900, + "teacher_loss": 0.21591657400131226 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.3734312951564789, + "learning_rate": 1.8133780705427647e-05, + "loss": 0.2557, + "step": 15901, + "teacher_loss": 0.24259501695632935 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.4090441167354584, + "learning_rate": 1.8131559539409126e-05, + "loss": 0.2703, + "step": 15902, + "teacher_loss": 0.25490033626556396 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.8420337438583374, + "learning_rate": 1.8129338301593335e-05, + "loss": 0.318, + "step": 15903, + "teacher_loss": 0.25973206758499146 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.5946215391159058, + "learning_rate": 1.8127116992031197e-05, + "loss": 0.2694, + "step": 15904, + "teacher_loss": 0.23325365781784058 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.36245644092559814, + "learning_rate": 1.8124895610773645e-05, + "loss": 0.2677, + "step": 15905, + "teacher_loss": 0.25722014904022217 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.9016590118408203, + "learning_rate": 1.81226741578716e-05, + "loss": 0.3018, + "step": 15906, + "teacher_loss": 0.23515459895133972 + }, + { + "compression_loss": 0.0, + "epoch": 2.87, + "label_loss": 0.5252824425697327, + "learning_rate": 1.8120452633376004e-05, + "loss": 0.2709, + "step": 15907, + "teacher_loss": 0.24266907572746277 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.764914870262146, + "learning_rate": 1.8118231037337785e-05, + "loss": 0.4951, + "step": 15908, + "teacher_loss": 0.46513789892196655 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.41471171379089355, + "learning_rate": 1.811600936980787e-05, + "loss": 0.2832, + "step": 15909, + "teacher_loss": 0.2685871720314026 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.23086172342300415, + "learning_rate": 1.811378763083721e-05, + "loss": 0.2129, + "step": 15910, + "teacher_loss": 0.21089771389961243 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.27980712056159973, + "learning_rate": 1.8111565820476732e-05, + "loss": 0.1937, + "step": 15911, + "teacher_loss": 0.1840856969356537 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4259776473045349, + "learning_rate": 1.8109343938777383e-05, + "loss": 0.2256, + "step": 15912, + "teacher_loss": 0.2032942771911621 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5708742737770081, + "learning_rate": 1.8107121985790092e-05, + "loss": 0.2301, + "step": 15913, + "teacher_loss": 0.19223767518997192 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.31685516238212585, + "learning_rate": 1.8104899961565816e-05, + "loss": 0.2355, + "step": 15914, + "teacher_loss": 0.226412832736969 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4490571618080139, + "learning_rate": 1.8102677866155488e-05, + "loss": 0.1976, + "step": 15915, + "teacher_loss": 0.16968148946762085 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.3662424087524414, + "learning_rate": 1.8100455699610065e-05, + "loss": 0.209, + "step": 15916, + "teacher_loss": 0.19152499735355377 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.32432007789611816, + "learning_rate": 1.809823346198048e-05, + "loss": 0.2702, + "step": 15917, + "teacher_loss": 0.2642146348953247 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4476051330566406, + "learning_rate": 1.8096011153317698e-05, + "loss": 0.2318, + "step": 15918, + "teacher_loss": 0.20779427886009216 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.3224039077758789, + "learning_rate": 1.809378877367266e-05, + "loss": 0.2295, + "step": 15919, + "teacher_loss": 0.21917583048343658 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.2092219591140747, + "learning_rate": 1.8091566323096325e-05, + "loss": 0.1907, + "step": 15920, + "teacher_loss": 0.18868398666381836 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.36096876859664917, + "learning_rate": 1.8089343801639632e-05, + "loss": 0.236, + "step": 15921, + "teacher_loss": 0.22213931381702423 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4118530750274658, + "learning_rate": 1.808712120935356e-05, + "loss": 0.2229, + "step": 15922, + "teacher_loss": 0.20195114612579346 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.40382254123687744, + "learning_rate": 1.808489854628905e-05, + "loss": 0.2719, + "step": 15923, + "teacher_loss": 0.2572861909866333 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4486439824104309, + "learning_rate": 1.8082675812497066e-05, + "loss": 0.3284, + "step": 15924, + "teacher_loss": 0.3150879740715027 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.34547877311706543, + "learning_rate": 1.8080453008028567e-05, + "loss": 0.2311, + "step": 15925, + "teacher_loss": 0.2183745801448822 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.2674420475959778, + "learning_rate": 1.8078230132934514e-05, + "loss": 0.2298, + "step": 15926, + "teacher_loss": 0.22563272714614868 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.6948617100715637, + "learning_rate": 1.8076007187265876e-05, + "loss": 0.3094, + "step": 15927, + "teacher_loss": 0.2666250765323639 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.2115933895111084, + "learning_rate": 1.8073784171073617e-05, + "loss": 0.216, + "step": 15928, + "teacher_loss": 0.21645301580429077 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.29225659370422363, + "learning_rate": 1.8071561084408695e-05, + "loss": 0.2151, + "step": 15929, + "teacher_loss": 0.20654284954071045 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.43161699175834656, + "learning_rate": 1.8069337927322097e-05, + "loss": 0.3134, + "step": 15930, + "teacher_loss": 0.30021822452545166 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5122411847114563, + "learning_rate": 1.8067114699864774e-05, + "loss": 0.229, + "step": 15931, + "teacher_loss": 0.19747330248355865 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.49110111594200134, + "learning_rate": 1.806489140208771e-05, + "loss": 0.2171, + "step": 15932, + "teacher_loss": 0.18667101860046387 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5662893056869507, + "learning_rate": 1.806266803404188e-05, + "loss": 0.2721, + "step": 15933, + "teacher_loss": 0.2394472360610962 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.6437488794326782, + "learning_rate": 1.8060444595778246e-05, + "loss": 0.272, + "step": 15934, + "teacher_loss": 0.23064376413822174 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5459778904914856, + "learning_rate": 1.8058221087347803e-05, + "loss": 0.2756, + "step": 15935, + "teacher_loss": 0.24557873606681824 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.43965017795562744, + "learning_rate": 1.805599750880151e-05, + "loss": 0.325, + "step": 15936, + "teacher_loss": 0.3122938573360443 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.135902538895607, + "learning_rate": 1.805377386019036e-05, + "loss": 0.306, + "step": 15937, + "teacher_loss": 0.32492056488990784 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4700288772583008, + "learning_rate": 1.805155014156533e-05, + "loss": 0.3548, + "step": 15938, + "teacher_loss": 0.342024564743042 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5970551371574402, + "learning_rate": 1.8049326352977404e-05, + "loss": 0.2825, + "step": 15939, + "teacher_loss": 0.24754837155342102 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.3149511218070984, + "learning_rate": 1.804710249447757e-05, + "loss": 0.1341, + "step": 15940, + "teacher_loss": 0.11395937949419022 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5430590510368347, + "learning_rate": 1.804487856611681e-05, + "loss": 0.2289, + "step": 15941, + "teacher_loss": 0.1939672827720642 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.33586010336875916, + "learning_rate": 1.804265456794611e-05, + "loss": 0.2516, + "step": 15942, + "teacher_loss": 0.24219557642936707 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.17955368757247925, + "learning_rate": 1.804043050001647e-05, + "loss": 0.1948, + "step": 15943, + "teacher_loss": 0.1964605450630188 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.45494702458381653, + "learning_rate": 1.8038206362378868e-05, + "loss": 0.2111, + "step": 15944, + "teacher_loss": 0.18398284912109375 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.5782331228256226, + "learning_rate": 1.8035982155084308e-05, + "loss": 0.2974, + "step": 15945, + "teacher_loss": 0.26622846722602844 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.34627142548561096, + "learning_rate": 1.8033757878183776e-05, + "loss": 0.1794, + "step": 15946, + "teacher_loss": 0.16088120639324188 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.8087563514709473, + "learning_rate": 1.8031533531728272e-05, + "loss": 0.3477, + "step": 15947, + "teacher_loss": 0.29646411538124084 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.42827725410461426, + "learning_rate": 1.8029309115768796e-05, + "loss": 0.2148, + "step": 15948, + "teacher_loss": 0.19104623794555664 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.2689998745918274, + "learning_rate": 1.8027084630356344e-05, + "loss": 0.1968, + "step": 15949, + "teacher_loss": 0.18875108659267426 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.6298221945762634, + "learning_rate": 1.8024860075541913e-05, + "loss": 0.342, + "step": 15950, + "teacher_loss": 0.3099777400493622 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.8050061464309692, + "learning_rate": 1.802263545137651e-05, + "loss": 0.2598, + "step": 15951, + "teacher_loss": 0.199222594499588 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.926358699798584, + "learning_rate": 1.802041075791114e-05, + "loss": 0.3293, + "step": 15952, + "teacher_loss": 0.2629215717315674 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4693329632282257, + "learning_rate": 1.801818599519681e-05, + "loss": 0.2656, + "step": 15953, + "teacher_loss": 0.24299326539039612 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 1.0085959434509277, + "learning_rate": 1.801596116328452e-05, + "loss": 0.434, + "step": 15954, + "teacher_loss": 0.3701905608177185 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4580909311771393, + "learning_rate": 1.8013736262225285e-05, + "loss": 0.2974, + "step": 15955, + "teacher_loss": 0.27949029207229614 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.8941679000854492, + "learning_rate": 1.801151129207011e-05, + "loss": 0.3346, + "step": 15956, + "teacher_loss": 0.2723815441131592 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.32722964882850647, + "learning_rate": 1.8009286252870013e-05, + "loss": 0.2254, + "step": 15957, + "teacher_loss": 0.21410246193408966 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.2847384810447693, + "learning_rate": 1.8007061144676005e-05, + "loss": 0.1826, + "step": 15958, + "teacher_loss": 0.1712675541639328 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.4693653881549835, + "learning_rate": 1.8004835967539098e-05, + "loss": 0.235, + "step": 15959, + "teacher_loss": 0.20900863409042358 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 1.1480679512023926, + "learning_rate": 1.8002610721510315e-05, + "loss": 0.3061, + "step": 15960, + "teacher_loss": 0.2125958949327469 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.7316836714744568, + "learning_rate": 1.8000385406640667e-05, + "loss": 0.3188, + "step": 15961, + "teacher_loss": 0.2729114890098572 + }, + { + "compression_loss": 0.0, + "epoch": 2.88, + "label_loss": 0.33481651544570923, + "learning_rate": 1.7998160022981177e-05, + "loss": 0.1959, + "step": 15962, + "teacher_loss": 0.18050611019134521 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.3492404818534851, + "learning_rate": 1.7995934570582872e-05, + "loss": 0.297, + "step": 15963, + "teacher_loss": 0.29121989011764526 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.6515673995018005, + "learning_rate": 1.7993709049496765e-05, + "loss": 0.3738, + "step": 15964, + "teacher_loss": 0.342968225479126 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.24512922763824463, + "learning_rate": 1.7991483459773887e-05, + "loss": 0.2537, + "step": 15965, + "teacher_loss": 0.25464287400245667 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.24002496898174286, + "learning_rate": 1.798925780146526e-05, + "loss": 0.2675, + "step": 15966, + "teacher_loss": 0.2706074118614197 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.32068461179733276, + "learning_rate": 1.798703207462191e-05, + "loss": 0.2741, + "step": 15967, + "teacher_loss": 0.26891613006591797 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.38085508346557617, + "learning_rate": 1.798480627929488e-05, + "loss": 0.2742, + "step": 15968, + "teacher_loss": 0.26235491037368774 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.3184136748313904, + "learning_rate": 1.7982580415535182e-05, + "loss": 0.3242, + "step": 15969, + "teacher_loss": 0.3248189687728882 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.5267320871353149, + "learning_rate": 1.7980354483393865e-05, + "loss": 0.2546, + "step": 15970, + "teacher_loss": 0.22439610958099365 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.5789691805839539, + "learning_rate": 1.797812848292195e-05, + "loss": 0.2764, + "step": 15971, + "teacher_loss": 0.24276217818260193 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.19721221923828125, + "learning_rate": 1.797590241417048e-05, + "loss": 0.224, + "step": 15972, + "teacher_loss": 0.22701910138130188 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 1.1301004886627197, + "learning_rate": 1.7973676277190486e-05, + "loss": 0.2947, + "step": 15973, + "teacher_loss": 0.20184342563152313 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.7524248957633972, + "learning_rate": 1.7971450072033016e-05, + "loss": 0.3497, + "step": 15974, + "teacher_loss": 0.30495405197143555 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.3591300845146179, + "learning_rate": 1.79692237987491e-05, + "loss": 0.2108, + "step": 15975, + "teacher_loss": 0.19436979293823242 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.16149577498435974, + "learning_rate": 1.796699745738979e-05, + "loss": 0.2423, + "step": 15976, + "teacher_loss": 0.25131040811538696 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.3153558671474457, + "learning_rate": 1.7964771048006116e-05, + "loss": 0.2446, + "step": 15977, + "teacher_loss": 0.2367090880870819 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.26709863543510437, + "learning_rate": 1.796254457064914e-05, + "loss": 0.2734, + "step": 15978, + "teacher_loss": 0.2741333246231079 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.7244864106178284, + "learning_rate": 1.7960318025369894e-05, + "loss": 0.286, + "step": 15979, + "teacher_loss": 0.23732250928878784 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.9245574474334717, + "learning_rate": 1.795809141221943e-05, + "loss": 0.3033, + "step": 15980, + "teacher_loss": 0.23424173891544342 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.46007609367370605, + "learning_rate": 1.7955864731248802e-05, + "loss": 0.2898, + "step": 15981, + "teacher_loss": 0.2708512544631958 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.15726551413536072, + "learning_rate": 1.7953637982509055e-05, + "loss": 0.175, + "step": 15982, + "teacher_loss": 0.1769482046365738 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.4426161050796509, + "learning_rate": 1.7951411166051247e-05, + "loss": 0.2501, + "step": 15983, + "teacher_loss": 0.22872647643089294 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.38759392499923706, + "learning_rate": 1.7949184281926426e-05, + "loss": 0.2208, + "step": 15984, + "teacher_loss": 0.20229625701904297 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 1.1463823318481445, + "learning_rate": 1.7946957330185656e-05, + "loss": 0.3437, + "step": 15985, + "teacher_loss": 0.2545274496078491 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.2256568968296051, + "learning_rate": 1.7944730310879987e-05, + "loss": 0.1881, + "step": 15986, + "teacher_loss": 0.18394413590431213 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.62260901927948, + "learning_rate": 1.794250322406048e-05, + "loss": 0.2296, + "step": 15987, + "teacher_loss": 0.18590421974658966 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.707524299621582, + "learning_rate": 1.79402760697782e-05, + "loss": 0.3909, + "step": 15988, + "teacher_loss": 0.3557557761669159 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.498782753944397, + "learning_rate": 1.79380488480842e-05, + "loss": 0.2433, + "step": 15989, + "teacher_loss": 0.21486930549144745 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.6074224710464478, + "learning_rate": 1.7935821559029554e-05, + "loss": 0.3893, + "step": 15990, + "teacher_loss": 0.36505138874053955 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.42969998717308044, + "learning_rate": 1.793359420266532e-05, + "loss": 0.2366, + "step": 15991, + "teacher_loss": 0.21510009467601776 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.82274329662323, + "learning_rate": 1.793136677904256e-05, + "loss": 0.4297, + "step": 15992, + "teacher_loss": 0.3860262334346771 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.3003634512424469, + "learning_rate": 1.7929139288212363e-05, + "loss": 0.2321, + "step": 15993, + "teacher_loss": 0.22450459003448486 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.6007168292999268, + "learning_rate": 1.7926911730225772e-05, + "loss": 0.387, + "step": 15994, + "teacher_loss": 0.36320769786834717 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.40766245126724243, + "learning_rate": 1.7924684105133873e-05, + "loss": 0.2391, + "step": 15995, + "teacher_loss": 0.22042471170425415 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.5066090822219849, + "learning_rate": 1.792245641298774e-05, + "loss": 0.2717, + "step": 15996, + "teacher_loss": 0.24564197659492493 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.5403831601142883, + "learning_rate": 1.7920228653838435e-05, + "loss": 0.4249, + "step": 15997, + "teacher_loss": 0.4120757579803467 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.24504396319389343, + "learning_rate": 1.7918000827737052e-05, + "loss": 0.2568, + "step": 15998, + "teacher_loss": 0.25805264711380005 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.20005804300308228, + "learning_rate": 1.7915772934734653e-05, + "loss": 0.2389, + "step": 15999, + "teacher_loss": 0.24320833384990692 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.6726173162460327, + "learning_rate": 1.7913544974882322e-05, + "loss": 0.3026, + "step": 16000, + "teacher_loss": 0.26144686341285706 + }, + { + "epoch": 2.89, + "eval_exact_match": 79.8864711447493, + "eval_f1": 87.51815242249732, + "step": 16000 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.914963960647583, + "learning_rate": 1.7911316948231144e-05, + "loss": 0.5497, + "step": 16001, + "teacher_loss": 0.5090828537940979 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.2682403326034546, + "learning_rate": 1.790908885483219e-05, + "loss": 0.2327, + "step": 16002, + "teacher_loss": 0.22879105806350708 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.45626530051231384, + "learning_rate": 1.7906860694736556e-05, + "loss": 0.2172, + "step": 16003, + "teacher_loss": 0.19069020450115204 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.2516730725765228, + "learning_rate": 1.7904632467995324e-05, + "loss": 0.2615, + "step": 16004, + "teacher_loss": 0.26263898611068726 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.7904136180877686, + "learning_rate": 1.7902404174659574e-05, + "loss": 0.3381, + "step": 16005, + "teacher_loss": 0.2878707945346832 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.3381238281726837, + "learning_rate": 1.79001758147804e-05, + "loss": 0.3125, + "step": 16006, + "teacher_loss": 0.30966639518737793 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.20560550689697266, + "learning_rate": 1.7897947388408885e-05, + "loss": 0.1979, + "step": 16007, + "teacher_loss": 0.1970043033361435 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.7509417533874512, + "learning_rate": 1.7895718895596126e-05, + "loss": 0.3176, + "step": 16008, + "teacher_loss": 0.26940953731536865 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 1.0460160970687866, + "learning_rate": 1.789349033639322e-05, + "loss": 0.304, + "step": 16009, + "teacher_loss": 0.22155362367630005 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.5968755483627319, + "learning_rate": 1.789126171085125e-05, + "loss": 0.3314, + "step": 16010, + "teacher_loss": 0.3018585443496704 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.44796669483184814, + "learning_rate": 1.788903301902132e-05, + "loss": 0.234, + "step": 16011, + "teacher_loss": 0.21017369627952576 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.18092384934425354, + "learning_rate": 1.7886804260954528e-05, + "loss": 0.1869, + "step": 16012, + "teacher_loss": 0.18758562207221985 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.15488232672214508, + "learning_rate": 1.7884575436701964e-05, + "loss": 0.1648, + "step": 16013, + "teacher_loss": 0.16591133177280426 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 1.0253793001174927, + "learning_rate": 1.7882346546314735e-05, + "loss": 0.5451, + "step": 16014, + "teacher_loss": 0.49170827865600586 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.4046880602836609, + "learning_rate": 1.788011758984394e-05, + "loss": 0.3106, + "step": 16015, + "teacher_loss": 0.3000958263874054 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.06968683004379272, + "learning_rate": 1.7877888567340687e-05, + "loss": 0.1745, + "step": 16016, + "teacher_loss": 0.1861688494682312 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.6674351692199707, + "learning_rate": 1.7875659478856077e-05, + "loss": 0.446, + "step": 16017, + "teacher_loss": 0.42139798402786255 + }, + { + "compression_loss": 0.0, + "epoch": 2.89, + "label_loss": 0.5845698714256287, + "learning_rate": 1.7873430324441218e-05, + "loss": 0.2796, + "step": 16018, + "teacher_loss": 0.2457694709300995 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.43772268295288086, + "learning_rate": 1.7871201104147212e-05, + "loss": 0.2538, + "step": 16019, + "teacher_loss": 0.23335182666778564 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.2942679524421692, + "learning_rate": 1.7868971818025178e-05, + "loss": 0.3938, + "step": 16020, + "teacher_loss": 0.40480470657348633 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.27934524416923523, + "learning_rate": 1.7866742466126216e-05, + "loss": 0.1647, + "step": 16021, + "teacher_loss": 0.1520131230354309 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 1.0127445459365845, + "learning_rate": 1.7864513048501446e-05, + "loss": 0.5147, + "step": 16022, + "teacher_loss": 0.4593789577484131 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.2737566828727722, + "learning_rate": 1.786228356520199e-05, + "loss": 0.2658, + "step": 16023, + "teacher_loss": 0.26493918895721436 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.8956804871559143, + "learning_rate": 1.7860054016278943e-05, + "loss": 0.3444, + "step": 16024, + "teacher_loss": 0.2831432819366455 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.32857370376586914, + "learning_rate": 1.785782440178343e-05, + "loss": 0.2529, + "step": 16025, + "teacher_loss": 0.2444915771484375 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.437171995639801, + "learning_rate": 1.785559472176658e-05, + "loss": 0.2524, + "step": 16026, + "teacher_loss": 0.23187844455242157 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 1.0220277309417725, + "learning_rate": 1.7853364976279498e-05, + "loss": 0.2716, + "step": 16027, + "teacher_loss": 0.1882309764623642 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.3576350808143616, + "learning_rate": 1.7851135165373322e-05, + "loss": 0.1813, + "step": 16028, + "teacher_loss": 0.1616886556148529 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.45193690061569214, + "learning_rate": 1.784890528909915e-05, + "loss": 0.2437, + "step": 16029, + "teacher_loss": 0.22054174542427063 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.5340882539749146, + "learning_rate": 1.784667534750813e-05, + "loss": 0.3684, + "step": 16030, + "teacher_loss": 0.3499506115913391 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.5889751315116882, + "learning_rate": 1.784444534065138e-05, + "loss": 0.2103, + "step": 16031, + "teacher_loss": 0.16824448108673096 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.2519285976886749, + "learning_rate": 1.7842215268580024e-05, + "loss": 0.1661, + "step": 16032, + "teacher_loss": 0.1565803438425064 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.17461583018302917, + "learning_rate": 1.783998513134519e-05, + "loss": 0.1815, + "step": 16033, + "teacher_loss": 0.18231001496315002 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.7595190405845642, + "learning_rate": 1.7837754928998016e-05, + "loss": 0.3799, + "step": 16034, + "teacher_loss": 0.33769965171813965 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.19194740056991577, + "learning_rate": 1.783552466158963e-05, + "loss": 0.1847, + "step": 16035, + "teacher_loss": 0.18393424153327942 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.497952401638031, + "learning_rate": 1.7833294329171163e-05, + "loss": 0.3125, + "step": 16036, + "teacher_loss": 0.29184794425964355 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.41504085063934326, + "learning_rate": 1.783106393179375e-05, + "loss": 0.3834, + "step": 16037, + "teacher_loss": 0.37989360094070435 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.4221075773239136, + "learning_rate": 1.782883346950853e-05, + "loss": 0.2627, + "step": 16038, + "teacher_loss": 0.24500735104084015 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 1.0650999546051025, + "learning_rate": 1.7826602942366643e-05, + "loss": 0.2836, + "step": 16039, + "teacher_loss": 0.1967909038066864 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.4444456398487091, + "learning_rate": 1.7824372350419223e-05, + "loss": 0.3626, + "step": 16040, + "teacher_loss": 0.35346710681915283 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.5500121116638184, + "learning_rate": 1.782214169371741e-05, + "loss": 0.2662, + "step": 16041, + "teacher_loss": 0.23464587330818176 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.29292920231819153, + "learning_rate": 1.7819910972312355e-05, + "loss": 0.25, + "step": 16042, + "teacher_loss": 0.24519295990467072 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.2686951160430908, + "learning_rate": 1.7817680186255195e-05, + "loss": 0.2027, + "step": 16043, + "teacher_loss": 0.19531291723251343 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.34421679377555847, + "learning_rate": 1.781544933559707e-05, + "loss": 0.3148, + "step": 16044, + "teacher_loss": 0.3114929795265198 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.4632073938846588, + "learning_rate": 1.781321842038914e-05, + "loss": 0.2803, + "step": 16045, + "teacher_loss": 0.2600013017654419 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.7540364861488342, + "learning_rate": 1.7810987440682545e-05, + "loss": 0.2989, + "step": 16046, + "teacher_loss": 0.24829471111297607 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.43955904245376587, + "learning_rate": 1.7808756396528433e-05, + "loss": 0.2104, + "step": 16047, + "teacher_loss": 0.18499056994915009 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.34506499767303467, + "learning_rate": 1.780652528797796e-05, + "loss": 0.2615, + "step": 16048, + "teacher_loss": 0.25218769907951355 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.5236640572547913, + "learning_rate": 1.7804294115082272e-05, + "loss": 0.2305, + "step": 16049, + "teacher_loss": 0.19791346788406372 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.7024568319320679, + "learning_rate": 1.7802062877892532e-05, + "loss": 0.253, + "step": 16050, + "teacher_loss": 0.20302355289459229 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.45694369077682495, + "learning_rate": 1.779983157645989e-05, + "loss": 0.2481, + "step": 16051, + "teacher_loss": 0.22492089867591858 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.815610408782959, + "learning_rate": 1.7797600210835507e-05, + "loss": 0.4499, + "step": 16052, + "teacher_loss": 0.4092215895652771 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.32092657685279846, + "learning_rate": 1.7795368781070536e-05, + "loss": 0.1856, + "step": 16053, + "teacher_loss": 0.17059555649757385 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.3048647344112396, + "learning_rate": 1.779313728721614e-05, + "loss": 0.1897, + "step": 16054, + "teacher_loss": 0.17687265574932098 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.4967675805091858, + "learning_rate": 1.7790905729323482e-05, + "loss": 0.256, + "step": 16055, + "teacher_loss": 0.2292841374874115 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.19485768675804138, + "learning_rate": 1.778867410744372e-05, + "loss": 0.2138, + "step": 16056, + "teacher_loss": 0.2158549427986145 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.5421093106269836, + "learning_rate": 1.778644242162802e-05, + "loss": 0.2159, + "step": 16057, + "teacher_loss": 0.17962437868118286 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.9199850559234619, + "learning_rate": 1.778421067192756e-05, + "loss": 0.4391, + "step": 16058, + "teacher_loss": 0.38563990592956543 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.18647682666778564, + "learning_rate": 1.7781978858393485e-05, + "loss": 0.1842, + "step": 16059, + "teacher_loss": 0.18389815092086792 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.5808572769165039, + "learning_rate": 1.777974698107698e-05, + "loss": 0.4089, + "step": 16060, + "teacher_loss": 0.38982248306274414 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.2736385762691498, + "learning_rate": 1.7777515040029213e-05, + "loss": 0.1812, + "step": 16061, + "teacher_loss": 0.17098172008991241 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 1.1857458353042603, + "learning_rate": 1.777528303530135e-05, + "loss": 0.3164, + "step": 16062, + "teacher_loss": 0.21980780363082886 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.25786224007606506, + "learning_rate": 1.777305096694457e-05, + "loss": 0.3953, + "step": 16063, + "teacher_loss": 0.4106091856956482 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.4659283757209778, + "learning_rate": 1.7770818835010045e-05, + "loss": 0.304, + "step": 16064, + "teacher_loss": 0.28598618507385254 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.3318653702735901, + "learning_rate": 1.7768586639548947e-05, + "loss": 0.2172, + "step": 16065, + "teacher_loss": 0.20446240901947021 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.6898326873779297, + "learning_rate": 1.7766354380612463e-05, + "loss": 0.2831, + "step": 16066, + "teacher_loss": 0.23785468935966492 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.7938921451568604, + "learning_rate": 1.776412205825177e-05, + "loss": 0.4072, + "step": 16067, + "teacher_loss": 0.3641902804374695 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 1.0556951761245728, + "learning_rate": 1.7761889672518038e-05, + "loss": 0.4154, + "step": 16068, + "teacher_loss": 0.3442896604537964 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.3701055645942688, + "learning_rate": 1.775965722346246e-05, + "loss": 0.2603, + "step": 16069, + "teacher_loss": 0.24807268381118774 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.477749764919281, + "learning_rate": 1.7757424711136217e-05, + "loss": 0.2776, + "step": 16070, + "teacher_loss": 0.2553953528404236 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.6850447058677673, + "learning_rate": 1.775519213559049e-05, + "loss": 0.477, + "step": 16071, + "teacher_loss": 0.45388537645339966 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.2957621216773987, + "learning_rate": 1.775295949687647e-05, + "loss": 0.224, + "step": 16072, + "teacher_loss": 0.21604913473129272 + }, + { + "compression_loss": 0.0, + "epoch": 2.9, + "label_loss": 0.4405668377876282, + "learning_rate": 1.7750726795045345e-05, + "loss": 0.2589, + "step": 16073, + "teacher_loss": 0.23867294192314148 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3325265347957611, + "learning_rate": 1.7748494030148296e-05, + "loss": 0.292, + "step": 16074, + "teacher_loss": 0.28747135400772095 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3731164038181305, + "learning_rate": 1.7746261202236522e-05, + "loss": 0.3192, + "step": 16075, + "teacher_loss": 0.3132528066635132 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.8894391655921936, + "learning_rate": 1.7744028311361212e-05, + "loss": 0.3906, + "step": 16076, + "teacher_loss": 0.3351427912712097 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.22205477952957153, + "learning_rate": 1.7741795357573556e-05, + "loss": 0.1731, + "step": 16077, + "teacher_loss": 0.16768944263458252 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.42038851976394653, + "learning_rate": 1.7739562340924762e-05, + "loss": 0.2521, + "step": 16078, + "teacher_loss": 0.23342561721801758 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.568663477897644, + "learning_rate": 1.773732926146601e-05, + "loss": 0.3841, + "step": 16079, + "teacher_loss": 0.36354494094848633 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.8479280471801758, + "learning_rate": 1.7735096119248505e-05, + "loss": 0.3881, + "step": 16080, + "teacher_loss": 0.3370633125305176 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.36185094714164734, + "learning_rate": 1.7732862914323447e-05, + "loss": 0.292, + "step": 16081, + "teacher_loss": 0.28420397639274597 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.37595734000205994, + "learning_rate": 1.7730629646742038e-05, + "loss": 0.2051, + "step": 16082, + "teacher_loss": 0.18616746366024017 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.33760541677474976, + "learning_rate": 1.772839631655548e-05, + "loss": 0.2243, + "step": 16083, + "teacher_loss": 0.21172216534614563 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3132728636264801, + "learning_rate": 1.772616292381497e-05, + "loss": 0.233, + "step": 16084, + "teacher_loss": 0.2240440845489502 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5714585781097412, + "learning_rate": 1.772392946857172e-05, + "loss": 0.3657, + "step": 16085, + "teacher_loss": 0.34282103180885315 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.30047088861465454, + "learning_rate": 1.7721695950876937e-05, + "loss": 0.2521, + "step": 16086, + "teacher_loss": 0.246731236577034 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5033674240112305, + "learning_rate": 1.7719462370781816e-05, + "loss": 0.2638, + "step": 16087, + "teacher_loss": 0.23716311156749725 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.25135719776153564, + "learning_rate": 1.7717228728337587e-05, + "loss": 0.2415, + "step": 16088, + "teacher_loss": 0.24039265513420105 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.28981077671051025, + "learning_rate": 1.7714995023595443e-05, + "loss": 0.1837, + "step": 16089, + "teacher_loss": 0.17186924815177917 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.41580283641815186, + "learning_rate": 1.7712761256606603e-05, + "loss": 0.2221, + "step": 16090, + "teacher_loss": 0.20057857036590576 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.31485021114349365, + "learning_rate": 1.7710527427422285e-05, + "loss": 0.2584, + "step": 16091, + "teacher_loss": 0.252141535282135 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5772066712379456, + "learning_rate": 1.7708293536093697e-05, + "loss": 0.4929, + "step": 16092, + "teacher_loss": 0.4835689067840576 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.46490949392318726, + "learning_rate": 1.7706059582672058e-05, + "loss": 0.2385, + "step": 16093, + "teacher_loss": 0.21330301463603973 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.2244323343038559, + "learning_rate": 1.770382556720859e-05, + "loss": 0.2339, + "step": 16094, + "teacher_loss": 0.23494362831115723 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3616204261779785, + "learning_rate": 1.7701591489754503e-05, + "loss": 0.2179, + "step": 16095, + "teacher_loss": 0.20198285579681396 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.43935900926589966, + "learning_rate": 1.7699357350361024e-05, + "loss": 0.4883, + "step": 16096, + "teacher_loss": 0.4937725067138672 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.8279876708984375, + "learning_rate": 1.7697123149079374e-05, + "loss": 0.3442, + "step": 16097, + "teacher_loss": 0.29044121503829956 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5152071118354797, + "learning_rate": 1.7694888885960782e-05, + "loss": 0.3062, + "step": 16098, + "teacher_loss": 0.28297024965286255 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.10091327875852585, + "learning_rate": 1.769265456105646e-05, + "loss": 0.1812, + "step": 16099, + "teacher_loss": 0.19010859727859497 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.9394775032997131, + "learning_rate": 1.7690420174417644e-05, + "loss": 0.3604, + "step": 16100, + "teacher_loss": 0.29601871967315674 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.6475400924682617, + "learning_rate": 1.7688185726095562e-05, + "loss": 0.2933, + "step": 16101, + "teacher_loss": 0.2539290487766266 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.8079171180725098, + "learning_rate": 1.7685951216141438e-05, + "loss": 0.5045, + "step": 16102, + "teacher_loss": 0.47076326608657837 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.261150985956192, + "learning_rate": 1.7683716644606508e-05, + "loss": 0.1593, + "step": 16103, + "teacher_loss": 0.1479683816432953 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.2505064606666565, + "learning_rate": 1.7681482011541997e-05, + "loss": 0.2793, + "step": 16104, + "teacher_loss": 0.28248023986816406 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.6228090524673462, + "learning_rate": 1.7679247316999153e-05, + "loss": 0.4014, + "step": 16105, + "teacher_loss": 0.3767520487308502 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.6806678175926208, + "learning_rate": 1.7677012561029193e-05, + "loss": 0.2471, + "step": 16106, + "teacher_loss": 0.1989212930202484 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.47563856840133667, + "learning_rate": 1.767477774368336e-05, + "loss": 0.4794, + "step": 16107, + "teacher_loss": 0.4798167943954468 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.7107974290847778, + "learning_rate": 1.7672542865012898e-05, + "loss": 0.3072, + "step": 16108, + "teacher_loss": 0.26232224702835083 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5897939801216125, + "learning_rate": 1.7670307925069037e-05, + "loss": 0.2354, + "step": 16109, + "teacher_loss": 0.19600075483322144 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5974275469779968, + "learning_rate": 1.766807292390302e-05, + "loss": 0.3761, + "step": 16110, + "teacher_loss": 0.35150012373924255 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.596744179725647, + "learning_rate": 1.7665837861566095e-05, + "loss": 0.2724, + "step": 16111, + "teacher_loss": 0.23638194799423218 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.24147331714630127, + "learning_rate": 1.7663602738109498e-05, + "loss": 0.2063, + "step": 16112, + "teacher_loss": 0.20243993401527405 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.12071894109249115, + "learning_rate": 1.7661367553584474e-05, + "loss": 0.1478, + "step": 16113, + "teacher_loss": 0.15078537166118622 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.499862939119339, + "learning_rate": 1.7659132308042277e-05, + "loss": 0.232, + "step": 16114, + "teacher_loss": 0.20223723351955414 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3025949001312256, + "learning_rate": 1.765689700153414e-05, + "loss": 0.1934, + "step": 16115, + "teacher_loss": 0.18127796053886414 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.464596688747406, + "learning_rate": 1.765466163411133e-05, + "loss": 0.2866, + "step": 16116, + "teacher_loss": 0.26687300205230713 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.6942428350448608, + "learning_rate": 1.765242620582508e-05, + "loss": 0.3075, + "step": 16117, + "teacher_loss": 0.2645556926727295 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.9362626075744629, + "learning_rate": 1.7650190716726654e-05, + "loss": 0.3366, + "step": 16118, + "teacher_loss": 0.27001315355300903 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.35826563835144043, + "learning_rate": 1.7647955166867298e-05, + "loss": 0.2517, + "step": 16119, + "teacher_loss": 0.23980633914470673 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.41671645641326904, + "learning_rate": 1.764571955629827e-05, + "loss": 0.3084, + "step": 16120, + "teacher_loss": 0.29631686210632324 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.5379695296287537, + "learning_rate": 1.7643483885070827e-05, + "loss": 0.3412, + "step": 16121, + "teacher_loss": 0.31934288144111633 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.29966095089912415, + "learning_rate": 1.7641248153236223e-05, + "loss": 0.268, + "step": 16122, + "teacher_loss": 0.2645174264907837 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.2360333800315857, + "learning_rate": 1.7639012360845717e-05, + "loss": 0.2522, + "step": 16123, + "teacher_loss": 0.2539408802986145 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3037000894546509, + "learning_rate": 1.763677650795057e-05, + "loss": 0.3281, + "step": 16124, + "teacher_loss": 0.330766499042511 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.646991491317749, + "learning_rate": 1.763454059460204e-05, + "loss": 0.3752, + "step": 16125, + "teacher_loss": 0.3450066149234772 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.4843207001686096, + "learning_rate": 1.7632304620851398e-05, + "loss": 0.2651, + "step": 16126, + "teacher_loss": 0.24073311686515808 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.28041505813598633, + "learning_rate": 1.76300685867499e-05, + "loss": 0.2381, + "step": 16127, + "teacher_loss": 0.23344899713993073 + }, + { + "compression_loss": 0.0, + "epoch": 2.91, + "label_loss": 0.3682040572166443, + "learning_rate": 1.762783249234882e-05, + "loss": 0.232, + "step": 16128, + "teacher_loss": 0.21683257818222046 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.26198697090148926, + "learning_rate": 1.762559633769941e-05, + "loss": 0.277, + "step": 16129, + "teacher_loss": 0.27867424488067627 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.19668696820735931, + "learning_rate": 1.7623360122852953e-05, + "loss": 0.2147, + "step": 16130, + "teacher_loss": 0.21672853827476501 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.21880286931991577, + "learning_rate": 1.7621123847860715e-05, + "loss": 0.2192, + "step": 16131, + "teacher_loss": 0.21927449107170105 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.34071192145347595, + "learning_rate": 1.7618887512773966e-05, + "loss": 0.2158, + "step": 16132, + "teacher_loss": 0.20189067721366882 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.8398120403289795, + "learning_rate": 1.761665111764398e-05, + "loss": 0.3458, + "step": 16133, + "teacher_loss": 0.2908574342727661 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.5805809497833252, + "learning_rate": 1.761441466252202e-05, + "loss": 0.2841, + "step": 16134, + "teacher_loss": 0.2511390447616577 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.24854423105716705, + "learning_rate": 1.7612178147459377e-05, + "loss": 0.2268, + "step": 16135, + "teacher_loss": 0.224374458193779 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.9784622192382812, + "learning_rate": 1.760994157250732e-05, + "loss": 0.3885, + "step": 16136, + "teacher_loss": 0.32300254702568054 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.3983399271965027, + "learning_rate": 1.7607704937717123e-05, + "loss": 0.3277, + "step": 16137, + "teacher_loss": 0.31987595558166504 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.4180619716644287, + "learning_rate": 1.7605468243140074e-05, + "loss": 0.3356, + "step": 16138, + "teacher_loss": 0.3264451026916504 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.7414376735687256, + "learning_rate": 1.7603231488827453e-05, + "loss": 0.4018, + "step": 16139, + "teacher_loss": 0.36406221985816956 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.3331107497215271, + "learning_rate": 1.7600994674830536e-05, + "loss": 0.2841, + "step": 16140, + "teacher_loss": 0.2786262035369873 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.44286176562309265, + "learning_rate": 1.7598757801200612e-05, + "loss": 0.3226, + "step": 16141, + "teacher_loss": 0.309234619140625 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.20023499429225922, + "learning_rate": 1.7596520867988957e-05, + "loss": 0.2079, + "step": 16142, + "teacher_loss": 0.20873233675956726 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.532241940498352, + "learning_rate": 1.7594283875246868e-05, + "loss": 0.3196, + "step": 16143, + "teacher_loss": 0.2960240840911865 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.7308896780014038, + "learning_rate": 1.7592046823025627e-05, + "loss": 0.406, + "step": 16144, + "teacher_loss": 0.36990946531295776 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.29296427965164185, + "learning_rate": 1.758980971137652e-05, + "loss": 0.3111, + "step": 16145, + "teacher_loss": 0.3130940794944763 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.6873419880867004, + "learning_rate": 1.758757254035085e-05, + "loss": 0.2975, + "step": 16146, + "teacher_loss": 0.2541968524456024 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.5063971877098083, + "learning_rate": 1.758533530999989e-05, + "loss": 0.3331, + "step": 16147, + "teacher_loss": 0.3137899339199066 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.43581122159957886, + "learning_rate": 1.7583098020374946e-05, + "loss": 0.2708, + "step": 16148, + "teacher_loss": 0.25241971015930176 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.6329385042190552, + "learning_rate": 1.758086067152731e-05, + "loss": 0.2483, + "step": 16149, + "teacher_loss": 0.20553848147392273 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.22338402271270752, + "learning_rate": 1.7578623263508275e-05, + "loss": 0.2685, + "step": 16150, + "teacher_loss": 0.2735450565814972 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.30802640318870544, + "learning_rate": 1.7576385796369144e-05, + "loss": 0.1825, + "step": 16151, + "teacher_loss": 0.16849884390830994 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.1767214834690094, + "learning_rate": 1.7574148270161206e-05, + "loss": 0.2979, + "step": 16152, + "teacher_loss": 0.31136196851730347 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.3767133951187134, + "learning_rate": 1.757191068493577e-05, + "loss": 0.2479, + "step": 16153, + "teacher_loss": 0.23358631134033203 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.7234150171279907, + "learning_rate": 1.7569673040744126e-05, + "loss": 0.4466, + "step": 16154, + "teacher_loss": 0.41580742597579956 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.4831986427307129, + "learning_rate": 1.7567435337637588e-05, + "loss": 0.2167, + "step": 16155, + "teacher_loss": 0.1871432214975357 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.3983255624771118, + "learning_rate": 1.7565197575667458e-05, + "loss": 0.2861, + "step": 16156, + "teacher_loss": 0.27364423871040344 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.5769867897033691, + "learning_rate": 1.7562959754885035e-05, + "loss": 0.4258, + "step": 16157, + "teacher_loss": 0.409015417098999 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.6339594125747681, + "learning_rate": 1.7560721875341632e-05, + "loss": 0.445, + "step": 16158, + "teacher_loss": 0.42395538091659546 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.26187747716903687, + "learning_rate": 1.7558483937088546e-05, + "loss": 0.1517, + "step": 16159, + "teacher_loss": 0.1394776701927185 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.6097872257232666, + "learning_rate": 1.75562459401771e-05, + "loss": 0.2761, + "step": 16160, + "teacher_loss": 0.23902052640914917 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.6100024580955505, + "learning_rate": 1.7554007884658602e-05, + "loss": 0.2712, + "step": 16161, + "teacher_loss": 0.23352715373039246 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.497979998588562, + "learning_rate": 1.7551769770584357e-05, + "loss": 0.2614, + "step": 16162, + "teacher_loss": 0.23510530591011047 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.9240494966506958, + "learning_rate": 1.7549531598005678e-05, + "loss": 0.2771, + "step": 16163, + "teacher_loss": 0.20525413751602173 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.31376272439956665, + "learning_rate": 1.7547293366973885e-05, + "loss": 0.2234, + "step": 16164, + "teacher_loss": 0.21331624686717987 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.45483916997909546, + "learning_rate": 1.75450550775403e-05, + "loss": 0.2459, + "step": 16165, + "teacher_loss": 0.22266817092895508 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.442954421043396, + "learning_rate": 1.7542816729756226e-05, + "loss": 0.2633, + "step": 16166, + "teacher_loss": 0.24332848191261292 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.4294344186782837, + "learning_rate": 1.7540578323672986e-05, + "loss": 0.352, + "step": 16167, + "teacher_loss": 0.34341752529144287 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.39837056398391724, + "learning_rate": 1.7538339859341902e-05, + "loss": 0.2946, + "step": 16168, + "teacher_loss": 0.28304722905158997 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.23290793597698212, + "learning_rate": 1.7536101336814302e-05, + "loss": 0.1838, + "step": 16169, + "teacher_loss": 0.17831340432167053 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.45772433280944824, + "learning_rate": 1.7533862756141495e-05, + "loss": 0.2702, + "step": 16170, + "teacher_loss": 0.24932971596717834 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.15961036086082458, + "learning_rate": 1.7531624117374818e-05, + "loss": 0.2181, + "step": 16171, + "teacher_loss": 0.22460046410560608 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.5282003283500671, + "learning_rate": 1.7529385420565588e-05, + "loss": 0.2384, + "step": 16172, + "teacher_loss": 0.2061898559331894 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.46457311511039734, + "learning_rate": 1.752714666576513e-05, + "loss": 0.3658, + "step": 16173, + "teacher_loss": 0.3548293113708496 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.48269230127334595, + "learning_rate": 1.7524907853024782e-05, + "loss": 0.3196, + "step": 16174, + "teacher_loss": 0.3015066981315613 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.4050183892250061, + "learning_rate": 1.7522668982395863e-05, + "loss": 0.2519, + "step": 16175, + "teacher_loss": 0.23483791947364807 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.16208238899707794, + "learning_rate": 1.752043005392971e-05, + "loss": 0.1718, + "step": 16176, + "teacher_loss": 0.1729292869567871 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.47579246759414673, + "learning_rate": 1.7518191067677648e-05, + "loss": 0.28, + "step": 16177, + "teacher_loss": 0.2582804560661316 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.33389919996261597, + "learning_rate": 1.7515952023691022e-05, + "loss": 0.2996, + "step": 16178, + "teacher_loss": 0.2958042621612549 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.36439526081085205, + "learning_rate": 1.7513712922021154e-05, + "loss": 0.2229, + "step": 16179, + "teacher_loss": 0.20723237097263336 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.36983248591423035, + "learning_rate": 1.7511473762719382e-05, + "loss": 0.3186, + "step": 16180, + "teacher_loss": 0.3128817677497864 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.30682337284088135, + "learning_rate": 1.7509234545837052e-05, + "loss": 0.1638, + "step": 16181, + "teacher_loss": 0.1478997766971588 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.3992833197116852, + "learning_rate": 1.7506995271425494e-05, + "loss": 0.1854, + "step": 16182, + "teacher_loss": 0.16160094738006592 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.4172820746898651, + "learning_rate": 1.7504755939536056e-05, + "loss": 0.2763, + "step": 16183, + "teacher_loss": 0.2606709897518158 + }, + { + "compression_loss": 0.0, + "epoch": 2.92, + "label_loss": 0.3798571228981018, + "learning_rate": 1.750251655022007e-05, + "loss": 0.3021, + "step": 16184, + "teacher_loss": 0.2934991121292114 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.46387526392936707, + "learning_rate": 1.7500277103528883e-05, + "loss": 0.1727, + "step": 16185, + "teacher_loss": 0.1403772234916687 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.7701764106750488, + "learning_rate": 1.749803759951384e-05, + "loss": 0.446, + "step": 16186, + "teacher_loss": 0.4099322557449341 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 1.0217154026031494, + "learning_rate": 1.749579803822628e-05, + "loss": 1.0812, + "step": 16187, + "teacher_loss": 1.0877916812896729 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.711355447769165, + "learning_rate": 1.749355841971755e-05, + "loss": 0.2979, + "step": 16188, + "teacher_loss": 0.2519662380218506 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.5177268385887146, + "learning_rate": 1.7491318744039012e-05, + "loss": 0.2221, + "step": 16189, + "teacher_loss": 0.1891995370388031 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.6185563802719116, + "learning_rate": 1.7489079011242002e-05, + "loss": 0.3184, + "step": 16190, + "teacher_loss": 0.2850569486618042 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.5217098593711853, + "learning_rate": 1.748683922137787e-05, + "loss": 0.2274, + "step": 16191, + "teacher_loss": 0.19475026428699493 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.1249813660979271, + "learning_rate": 1.748459937449797e-05, + "loss": 0.1696, + "step": 16192, + "teacher_loss": 0.17457376420497894 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.6198744773864746, + "learning_rate": 1.7482359470653656e-05, + "loss": 0.325, + "step": 16193, + "teacher_loss": 0.2922342121601105 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 1.0326619148254395, + "learning_rate": 1.7480119509896285e-05, + "loss": 0.4159, + "step": 16194, + "teacher_loss": 0.3473173975944519 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.5046898126602173, + "learning_rate": 1.747787949227721e-05, + "loss": 0.2326, + "step": 16195, + "teacher_loss": 0.2023729830980301 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.21299424767494202, + "learning_rate": 1.7475639417847783e-05, + "loss": 0.1895, + "step": 16196, + "teacher_loss": 0.18686988949775696 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.2575538754463196, + "learning_rate": 1.7473399286659366e-05, + "loss": 0.1551, + "step": 16197, + "teacher_loss": 0.14368784427642822 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.9524407982826233, + "learning_rate": 1.747115909876332e-05, + "loss": 0.2605, + "step": 16198, + "teacher_loss": 0.18356439471244812 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.25883394479751587, + "learning_rate": 1.746891885421101e-05, + "loss": 0.2105, + "step": 16199, + "teacher_loss": 0.20517179369926453 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.5914641618728638, + "learning_rate": 1.7466678553053788e-05, + "loss": 0.4274, + "step": 16200, + "teacher_loss": 0.40914636850357056 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.33066707849502563, + "learning_rate": 1.7464438195343025e-05, + "loss": 0.254, + "step": 16201, + "teacher_loss": 0.24547211825847626 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.38130831718444824, + "learning_rate": 1.746219778113008e-05, + "loss": 0.2571, + "step": 16202, + "teacher_loss": 0.24333316087722778 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.34671711921691895, + "learning_rate": 1.7459957310466325e-05, + "loss": 0.1723, + "step": 16203, + "teacher_loss": 0.15288150310516357 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.41479361057281494, + "learning_rate": 1.7457716783403122e-05, + "loss": 0.2055, + "step": 16204, + "teacher_loss": 0.18227511644363403 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 1.1521586179733276, + "learning_rate": 1.7455476199991838e-05, + "loss": 0.5271, + "step": 16205, + "teacher_loss": 0.45759499073028564 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.39160391688346863, + "learning_rate": 1.7453235560283855e-05, + "loss": 0.2155, + "step": 16206, + "teacher_loss": 0.19596587121486664 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.6835144758224487, + "learning_rate": 1.7450994864330532e-05, + "loss": 0.2674, + "step": 16207, + "teacher_loss": 0.22113800048828125 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.5899465680122375, + "learning_rate": 1.744875411218324e-05, + "loss": 0.2991, + "step": 16208, + "teacher_loss": 0.26673072576522827 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.24086038768291473, + "learning_rate": 1.7446513303893367e-05, + "loss": 0.1614, + "step": 16209, + "teacher_loss": 0.15262514352798462 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.4065621793270111, + "learning_rate": 1.7444272439512272e-05, + "loss": 0.2729, + "step": 16210, + "teacher_loss": 0.25803348422050476 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.4345285892486572, + "learning_rate": 1.7442031519091343e-05, + "loss": 0.2669, + "step": 16211, + "teacher_loss": 0.24826453626155853 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.3786526322364807, + "learning_rate": 1.743979054268195e-05, + "loss": 0.3106, + "step": 16212, + "teacher_loss": 0.3030116558074951 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.3400603234767914, + "learning_rate": 1.7437549510335475e-05, + "loss": 0.262, + "step": 16213, + "teacher_loss": 0.25330159068107605 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.3885974884033203, + "learning_rate": 1.7435308422103298e-05, + "loss": 0.2524, + "step": 16214, + "teacher_loss": 0.2372906506061554 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.3973779082298279, + "learning_rate": 1.74330672780368e-05, + "loss": 0.3326, + "step": 16215, + "teacher_loss": 0.3254256248474121 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.38280054926872253, + "learning_rate": 1.743082607818737e-05, + "loss": 0.1683, + "step": 16216, + "teacher_loss": 0.14444591104984283 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 1.3200329542160034, + "learning_rate": 1.7428584822606378e-05, + "loss": 0.4854, + "step": 16217, + "teacher_loss": 0.39266103506088257 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.2061062455177307, + "learning_rate": 1.7426343511345215e-05, + "loss": 0.1861, + "step": 16218, + "teacher_loss": 0.18386892974376678 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.8778562545776367, + "learning_rate": 1.7424102144455276e-05, + "loss": 0.5423, + "step": 16219, + "teacher_loss": 0.5050198435783386 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.4748685956001282, + "learning_rate": 1.7421860721987944e-05, + "loss": 0.2362, + "step": 16220, + "teacher_loss": 0.209650918841362 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.41377168893814087, + "learning_rate": 1.7419619243994607e-05, + "loss": 0.1578, + "step": 16221, + "teacher_loss": 0.12932580709457397 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.3093533515930176, + "learning_rate": 1.7417377710526646e-05, + "loss": 0.1763, + "step": 16222, + "teacher_loss": 0.16154059767723083 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.6148918271064758, + "learning_rate": 1.741513612163547e-05, + "loss": 0.3033, + "step": 16223, + "teacher_loss": 0.2687157988548279 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.344913125038147, + "learning_rate": 1.7412894477372462e-05, + "loss": 0.2304, + "step": 16224, + "teacher_loss": 0.21768099069595337 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.25588029623031616, + "learning_rate": 1.7410652777789018e-05, + "loss": 0.1551, + "step": 16225, + "teacher_loss": 0.1438492387533188 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.4032304286956787, + "learning_rate": 1.7408411022936535e-05, + "loss": 0.3067, + "step": 16226, + "teacher_loss": 0.296006977558136 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.4316675662994385, + "learning_rate": 1.7406169212866405e-05, + "loss": 0.2109, + "step": 16227, + "teacher_loss": 0.1864151656627655 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.24068279564380646, + "learning_rate": 1.7403927347630028e-05, + "loss": 0.1561, + "step": 16228, + "teacher_loss": 0.14674007892608643 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.7074235677719116, + "learning_rate": 1.740168542727881e-05, + "loss": 0.3919, + "step": 16229, + "teacher_loss": 0.3567984104156494 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.6305174231529236, + "learning_rate": 1.7399443451864138e-05, + "loss": 0.2308, + "step": 16230, + "teacher_loss": 0.18639595806598663 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.6713123321533203, + "learning_rate": 1.739720142143743e-05, + "loss": 0.3527, + "step": 16231, + "teacher_loss": 0.31731051206588745 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.69814532995224, + "learning_rate": 1.7394959336050076e-05, + "loss": 0.333, + "step": 16232, + "teacher_loss": 0.2923782169818878 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.5767325162887573, + "learning_rate": 1.7392717195753482e-05, + "loss": 0.299, + "step": 16233, + "teacher_loss": 0.2681657671928406 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.49995118379592896, + "learning_rate": 1.7390475000599063e-05, + "loss": 0.25, + "step": 16234, + "teacher_loss": 0.22225210070610046 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.3751608729362488, + "learning_rate": 1.7388232750638214e-05, + "loss": 0.2718, + "step": 16235, + "teacher_loss": 0.2603573203086853 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.644255518913269, + "learning_rate": 1.7385990445922347e-05, + "loss": 0.5252, + "step": 16236, + "teacher_loss": 0.5119647979736328 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.32858479022979736, + "learning_rate": 1.738374808650288e-05, + "loss": 0.1897, + "step": 16237, + "teacher_loss": 0.17430666089057922 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.4562036395072937, + "learning_rate": 1.738150567243121e-05, + "loss": 0.2564, + "step": 16238, + "teacher_loss": 0.2341517210006714 + }, + { + "compression_loss": 0.0, + "epoch": 2.93, + "label_loss": 0.25114375352859497, + "learning_rate": 1.737926320375876e-05, + "loss": 0.1806, + "step": 16239, + "teacher_loss": 0.1727811098098755 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.378387451171875, + "learning_rate": 1.737702068053693e-05, + "loss": 0.2352, + "step": 16240, + "teacher_loss": 0.21928784251213074 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.43779659271240234, + "learning_rate": 1.737477810281715e-05, + "loss": 0.3617, + "step": 16241, + "teacher_loss": 0.3532874584197998 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4488775134086609, + "learning_rate": 1.7372535470650827e-05, + "loss": 0.3713, + "step": 16242, + "teacher_loss": 0.36263734102249146 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3894228935241699, + "learning_rate": 1.7370292784089377e-05, + "loss": 0.2033, + "step": 16243, + "teacher_loss": 0.1826312392950058 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4521743655204773, + "learning_rate": 1.736805004318422e-05, + "loss": 0.2173, + "step": 16244, + "teacher_loss": 0.1911720335483551 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5785133838653564, + "learning_rate": 1.7365807247986776e-05, + "loss": 0.3274, + "step": 16245, + "teacher_loss": 0.2995510697364807 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5688097476959229, + "learning_rate": 1.7363564398548466e-05, + "loss": 0.2708, + "step": 16246, + "teacher_loss": 0.23773327469825745 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3906450569629669, + "learning_rate": 1.736132149492071e-05, + "loss": 0.2493, + "step": 16247, + "teacher_loss": 0.23362880945205688 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5007302761077881, + "learning_rate": 1.735907853715493e-05, + "loss": 0.2363, + "step": 16248, + "teacher_loss": 0.20691628754138947 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4081491529941559, + "learning_rate": 1.735683552530255e-05, + "loss": 0.1904, + "step": 16249, + "teacher_loss": 0.1661631464958191 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.861845850944519, + "learning_rate": 1.7354592459415003e-05, + "loss": 0.2617, + "step": 16250, + "teacher_loss": 0.19499847292900085 + }, + { + "epoch": 2.94, + "eval_exact_match": 79.47965941343425, + "eval_f1": 87.22782178210471, + "step": 16250 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.8419839143753052, + "learning_rate": 1.735234933954371e-05, + "loss": 0.3644, + "step": 16251, + "teacher_loss": 0.3113023638725281 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3901873826980591, + "learning_rate": 1.7350106165740097e-05, + "loss": 0.2734, + "step": 16252, + "teacher_loss": 0.26039940118789673 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.38331127166748047, + "learning_rate": 1.7347862938055595e-05, + "loss": 0.27, + "step": 16253, + "teacher_loss": 0.2573677897453308 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5922526121139526, + "learning_rate": 1.734561965654164e-05, + "loss": 0.2926, + "step": 16254, + "teacher_loss": 0.25933918356895447 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.6473124027252197, + "learning_rate": 1.7343376321249654e-05, + "loss": 0.284, + "step": 16255, + "teacher_loss": 0.2436695694923401 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4468892812728882, + "learning_rate": 1.7341132932231075e-05, + "loss": 0.226, + "step": 16256, + "teacher_loss": 0.20140531659126282 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.876840353012085, + "learning_rate": 1.7338889489537336e-05, + "loss": 0.4509, + "step": 16257, + "teacher_loss": 0.4036021828651428 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.7752852439880371, + "learning_rate": 1.7336645993219876e-05, + "loss": 0.3929, + "step": 16258, + "teacher_loss": 0.3503738045692444 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.25751954317092896, + "learning_rate": 1.7334402443330126e-05, + "loss": 0.2534, + "step": 16259, + "teacher_loss": 0.25288885831832886 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.1455126702785492, + "learning_rate": 1.733215883991953e-05, + "loss": 0.1277, + "step": 16260, + "teacher_loss": 0.12575474381446838 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.42787203192710876, + "learning_rate": 1.7329915183039523e-05, + "loss": 0.203, + "step": 16261, + "teacher_loss": 0.17806656658649445 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3982592821121216, + "learning_rate": 1.7327671472741547e-05, + "loss": 0.2261, + "step": 16262, + "teacher_loss": 0.20698818564414978 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3402353525161743, + "learning_rate": 1.732542770907704e-05, + "loss": 0.2395, + "step": 16263, + "teacher_loss": 0.22830656170845032 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5215482711791992, + "learning_rate": 1.7323183892097455e-05, + "loss": 0.2055, + "step": 16264, + "teacher_loss": 0.1703799068927765 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.87959885597229, + "learning_rate": 1.7320940021854217e-05, + "loss": 0.3469, + "step": 16265, + "teacher_loss": 0.2877114415168762 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.34240660071372986, + "learning_rate": 1.731869609839879e-05, + "loss": 0.2189, + "step": 16266, + "teacher_loss": 0.20512527227401733 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.39977091550827026, + "learning_rate": 1.7316452121782614e-05, + "loss": 0.2623, + "step": 16267, + "teacher_loss": 0.24700471758842468 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.23541511595249176, + "learning_rate": 1.7314208092057133e-05, + "loss": 0.1834, + "step": 16268, + "teacher_loss": 0.17763873934745789 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.6094462871551514, + "learning_rate": 1.7311964009273802e-05, + "loss": 0.2581, + "step": 16269, + "teacher_loss": 0.21907475590705872 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3922407031059265, + "learning_rate": 1.7309719873484065e-05, + "loss": 0.2978, + "step": 16270, + "teacher_loss": 0.287272572517395 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3889186382293701, + "learning_rate": 1.730747568473938e-05, + "loss": 0.2678, + "step": 16271, + "teacher_loss": 0.25430089235305786 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.7005231380462646, + "learning_rate": 1.730523144309119e-05, + "loss": 0.2403, + "step": 16272, + "teacher_loss": 0.189142107963562 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.6324283480644226, + "learning_rate": 1.7302987148590956e-05, + "loss": 0.3775, + "step": 16273, + "teacher_loss": 0.34919309616088867 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.06284971535205841, + "learning_rate": 1.7300742801290132e-05, + "loss": 0.1635, + "step": 16274, + "teacher_loss": 0.17464952170848846 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5814729928970337, + "learning_rate": 1.729849840124018e-05, + "loss": 0.3112, + "step": 16275, + "teacher_loss": 0.28117141127586365 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5725794434547424, + "learning_rate": 1.7296253948492546e-05, + "loss": 0.4421, + "step": 16276, + "teacher_loss": 0.42760375142097473 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.2967584729194641, + "learning_rate": 1.729400944309869e-05, + "loss": 0.2443, + "step": 16277, + "teacher_loss": 0.23845678567886353 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.31133121252059937, + "learning_rate": 1.7291764885110078e-05, + "loss": 0.2415, + "step": 16278, + "teacher_loss": 0.23370006680488586 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5670042634010315, + "learning_rate": 1.7289520274578167e-05, + "loss": 0.2549, + "step": 16279, + "teacher_loss": 0.22019356489181519 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.36124199628829956, + "learning_rate": 1.7287275611554425e-05, + "loss": 0.2509, + "step": 16280, + "teacher_loss": 0.23863258957862854 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5369096994400024, + "learning_rate": 1.7285030896090307e-05, + "loss": 0.2824, + "step": 16281, + "teacher_loss": 0.25416868925094604 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4974510669708252, + "learning_rate": 1.728278612823728e-05, + "loss": 0.3457, + "step": 16282, + "teacher_loss": 0.32883739471435547 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4022822678089142, + "learning_rate": 1.728054130804681e-05, + "loss": 0.2584, + "step": 16283, + "teacher_loss": 0.24246428906917572 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.508563756942749, + "learning_rate": 1.7278296435570372e-05, + "loss": 0.2159, + "step": 16284, + "teacher_loss": 0.18336786329746246 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.6856218576431274, + "learning_rate": 1.727605151085942e-05, + "loss": 0.2553, + "step": 16285, + "teacher_loss": 0.2075391411781311 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.372144877910614, + "learning_rate": 1.7273806533965437e-05, + "loss": 0.227, + "step": 16286, + "teacher_loss": 0.21084558963775635 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.4844840168952942, + "learning_rate": 1.7271561504939886e-05, + "loss": 0.5519, + "step": 16287, + "teacher_loss": 0.5594152212142944 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.49252432584762573, + "learning_rate": 1.7269316423834238e-05, + "loss": 0.3128, + "step": 16288, + "teacher_loss": 0.292876660823822 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.2281474471092224, + "learning_rate": 1.7267071290699973e-05, + "loss": 0.196, + "step": 16289, + "teacher_loss": 0.1924777626991272 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.16755689680576324, + "learning_rate": 1.7264826105588554e-05, + "loss": 0.1538, + "step": 16290, + "teacher_loss": 0.15230529010295868 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5102900266647339, + "learning_rate": 1.7262580868551465e-05, + "loss": 0.2026, + "step": 16291, + "teacher_loss": 0.16836917400360107 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.49474066495895386, + "learning_rate": 1.726033557964018e-05, + "loss": 0.355, + "step": 16292, + "teacher_loss": 0.3395208716392517 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.3448638916015625, + "learning_rate": 1.7258090238906177e-05, + "loss": 0.2354, + "step": 16293, + "teacher_loss": 0.22324399650096893 + }, + { + "compression_loss": 0.0, + "epoch": 2.94, + "label_loss": 0.5268954038619995, + "learning_rate": 1.725584484640094e-05, + "loss": 0.2394, + "step": 16294, + "teacher_loss": 0.20747356116771698 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.42755186557769775, + "learning_rate": 1.725359940217594e-05, + "loss": 0.2955, + "step": 16295, + "teacher_loss": 0.2808440923690796 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.6451007127761841, + "learning_rate": 1.7251353906282658e-05, + "loss": 0.2849, + "step": 16296, + "teacher_loss": 0.24487422406673431 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3630632758140564, + "learning_rate": 1.7249108358772588e-05, + "loss": 0.2389, + "step": 16297, + "teacher_loss": 0.22506776452064514 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3847913146018982, + "learning_rate": 1.72468627596972e-05, + "loss": 0.3577, + "step": 16298, + "teacher_loss": 0.3546708822250366 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3812359571456909, + "learning_rate": 1.7244617109107995e-05, + "loss": 0.2024, + "step": 16299, + "teacher_loss": 0.18252722918987274 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.9431231021881104, + "learning_rate": 1.7242371407056438e-05, + "loss": 0.2437, + "step": 16300, + "teacher_loss": 0.16598841547966003 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.27630993723869324, + "learning_rate": 1.7240125653594034e-05, + "loss": 0.2831, + "step": 16301, + "teacher_loss": 0.28381648659706116 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.386505126953125, + "learning_rate": 1.7237879848772267e-05, + "loss": 0.1504, + "step": 16302, + "teacher_loss": 0.1241685301065445 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.346378892660141, + "learning_rate": 1.7235633992642615e-05, + "loss": 0.1937, + "step": 16303, + "teacher_loss": 0.1767573356628418 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3373514413833618, + "learning_rate": 1.7233388085256587e-05, + "loss": 0.2207, + "step": 16304, + "teacher_loss": 0.2077610194683075 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.5133582949638367, + "learning_rate": 1.7231142126665666e-05, + "loss": 0.3009, + "step": 16305, + "teacher_loss": 0.27734804153442383 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3472082018852234, + "learning_rate": 1.722889611692134e-05, + "loss": 0.2696, + "step": 16306, + "teacher_loss": 0.260980486869812 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 1.1247886419296265, + "learning_rate": 1.7226650056075118e-05, + "loss": 0.6099, + "step": 16307, + "teacher_loss": 0.552704393863678 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.42673367261886597, + "learning_rate": 1.7224403944178474e-05, + "loss": 0.2908, + "step": 16308, + "teacher_loss": 0.27568483352661133 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.5553944706916809, + "learning_rate": 1.7222157781282924e-05, + "loss": 0.2537, + "step": 16309, + "teacher_loss": 0.2202032506465912 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.5381714105606079, + "learning_rate": 1.721991156743996e-05, + "loss": 0.2726, + "step": 16310, + "teacher_loss": 0.24308007955551147 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.390067458152771, + "learning_rate": 1.7217665302701075e-05, + "loss": 0.3115, + "step": 16311, + "teacher_loss": 0.30278295278549194 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.9225806593894958, + "learning_rate": 1.7215418987117774e-05, + "loss": 0.3549, + "step": 16312, + "teacher_loss": 0.29182058572769165 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.6240533590316772, + "learning_rate": 1.7213172620741556e-05, + "loss": 0.253, + "step": 16313, + "teacher_loss": 0.21180428564548492 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.4970123767852783, + "learning_rate": 1.721092620362393e-05, + "loss": 0.2994, + "step": 16314, + "teacher_loss": 0.277407169342041 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.45748141407966614, + "learning_rate": 1.720867973581639e-05, + "loss": 0.2321, + "step": 16315, + "teacher_loss": 0.20705494284629822 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.38404062390327454, + "learning_rate": 1.7206433217370452e-05, + "loss": 0.196, + "step": 16316, + "teacher_loss": 0.17506375908851624 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.4473424553871155, + "learning_rate": 1.7204186648337614e-05, + "loss": 0.2612, + "step": 16317, + "teacher_loss": 0.2405557632446289 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.975817084312439, + "learning_rate": 1.7201940028769384e-05, + "loss": 0.3909, + "step": 16318, + "teacher_loss": 0.32595011591911316 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.286388635635376, + "learning_rate": 1.719969335871727e-05, + "loss": 0.328, + "step": 16319, + "teacher_loss": 0.33257192373275757 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.2465459406375885, + "learning_rate": 1.7197446638232778e-05, + "loss": 0.3276, + "step": 16320, + "teacher_loss": 0.3366280794143677 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.8590790033340454, + "learning_rate": 1.7195199867367428e-05, + "loss": 0.3324, + "step": 16321, + "teacher_loss": 0.273929625749588 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.2645612955093384, + "learning_rate": 1.7192953046172726e-05, + "loss": 0.1921, + "step": 16322, + "teacher_loss": 0.18405257165431976 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.22946320474147797, + "learning_rate": 1.719070617470018e-05, + "loss": 0.1404, + "step": 16323, + "teacher_loss": 0.13054664433002472 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.5966145992279053, + "learning_rate": 1.7188459253001322e-05, + "loss": 0.3846, + "step": 16324, + "teacher_loss": 0.36104580760002136 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.5277209877967834, + "learning_rate": 1.718621228112764e-05, + "loss": 0.2809, + "step": 16325, + "teacher_loss": 0.25347211956977844 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.4121139645576477, + "learning_rate": 1.7183965259130676e-05, + "loss": 0.2116, + "step": 16326, + "teacher_loss": 0.18931615352630615 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.441952645778656, + "learning_rate": 1.7181718187061932e-05, + "loss": 0.2549, + "step": 16327, + "teacher_loss": 0.23408719897270203 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3557584285736084, + "learning_rate": 1.717947106497293e-05, + "loss": 0.2776, + "step": 16328, + "teacher_loss": 0.26895472407341003 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.35478830337524414, + "learning_rate": 1.7177223892915196e-05, + "loss": 0.2408, + "step": 16329, + "teacher_loss": 0.22809430956840515 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.6504161953926086, + "learning_rate": 1.717497667094024e-05, + "loss": 0.3005, + "step": 16330, + "teacher_loss": 0.2616676688194275 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.4984099268913269, + "learning_rate": 1.7172729399099592e-05, + "loss": 0.3456, + "step": 16331, + "teacher_loss": 0.3286668062210083 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.8170156478881836, + "learning_rate": 1.7170482077444773e-05, + "loss": 0.5282, + "step": 16332, + "teacher_loss": 0.4961158335208893 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.35584887862205505, + "learning_rate": 1.7168234706027304e-05, + "loss": 0.2352, + "step": 16333, + "teacher_loss": 0.2218213677406311 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.428577184677124, + "learning_rate": 1.716598728489872e-05, + "loss": 0.1892, + "step": 16334, + "teacher_loss": 0.1625823676586151 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.4782981276512146, + "learning_rate": 1.7163739814110538e-05, + "loss": 0.2786, + "step": 16335, + "teacher_loss": 0.25641757249832153 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.8452093005180359, + "learning_rate": 1.7161492293714288e-05, + "loss": 0.3128, + "step": 16336, + "teacher_loss": 0.2536107897758484 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.37375450134277344, + "learning_rate": 1.7159244723761505e-05, + "loss": 0.294, + "step": 16337, + "teacher_loss": 0.2851158380508423 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.45043981075286865, + "learning_rate": 1.7156997104303712e-05, + "loss": 0.2547, + "step": 16338, + "teacher_loss": 0.23295922577381134 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.7535474300384521, + "learning_rate": 1.7154749435392442e-05, + "loss": 0.32, + "step": 16339, + "teacher_loss": 0.27182990312576294 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.664901614189148, + "learning_rate": 1.715250171707923e-05, + "loss": 0.3492, + "step": 16340, + "teacher_loss": 0.3141516447067261 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3617512285709381, + "learning_rate": 1.7150253949415604e-05, + "loss": 0.2934, + "step": 16341, + "teacher_loss": 0.2858373522758484 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.8883835673332214, + "learning_rate": 1.7148006132453102e-05, + "loss": 0.3001, + "step": 16342, + "teacher_loss": 0.23472237586975098 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.9331611394882202, + "learning_rate": 1.7145758266243258e-05, + "loss": 0.3861, + "step": 16343, + "teacher_loss": 0.32535603642463684 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.22941254079341888, + "learning_rate": 1.7143510350837615e-05, + "loss": 0.2551, + "step": 16344, + "teacher_loss": 0.25792190432548523 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.6563414335250854, + "learning_rate": 1.7141262386287703e-05, + "loss": 0.2796, + "step": 16345, + "teacher_loss": 0.23777785897254944 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.3304232954978943, + "learning_rate": 1.7139014372645065e-05, + "loss": 0.2058, + "step": 16346, + "teacher_loss": 0.1920068860054016 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.7204067707061768, + "learning_rate": 1.7136766309961243e-05, + "loss": 0.2529, + "step": 16347, + "teacher_loss": 0.20091986656188965 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.20554420351982117, + "learning_rate": 1.713451819828777e-05, + "loss": 0.1697, + "step": 16348, + "teacher_loss": 0.1657392382621765 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.39084890484809875, + "learning_rate": 1.7132270037676198e-05, + "loss": 0.2001, + "step": 16349, + "teacher_loss": 0.1789606213569641 + }, + { + "compression_loss": 0.0, + "epoch": 2.95, + "label_loss": 0.1893659383058548, + "learning_rate": 1.7130021828178066e-05, + "loss": 0.225, + "step": 16350, + "teacher_loss": 0.2289954572916031 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.46683138608932495, + "learning_rate": 1.712777356984492e-05, + "loss": 0.3104, + "step": 16351, + "teacher_loss": 0.29303181171417236 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.4296014904975891, + "learning_rate": 1.7125525262728308e-05, + "loss": 0.2729, + "step": 16352, + "teacher_loss": 0.2554601728916168 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.3253156542778015, + "learning_rate": 1.712327690687977e-05, + "loss": 0.278, + "step": 16353, + "teacher_loss": 0.27270281314849854 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.16371138393878937, + "learning_rate": 1.7121028502350864e-05, + "loss": 0.1761, + "step": 16354, + "teacher_loss": 0.17745622992515564 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.18056559562683105, + "learning_rate": 1.711878004919313e-05, + "loss": 0.2001, + "step": 16355, + "teacher_loss": 0.2022608071565628 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.1301613748073578, + "learning_rate": 1.7116531547458115e-05, + "loss": 0.1799, + "step": 16356, + "teacher_loss": 0.1853765845298767 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.6129274964332581, + "learning_rate": 1.711428299719739e-05, + "loss": 0.2377, + "step": 16357, + "teacher_loss": 0.19595646858215332 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5263664126396179, + "learning_rate": 1.7112034398462487e-05, + "loss": 0.2359, + "step": 16358, + "teacher_loss": 0.20363107323646545 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.3070058822631836, + "learning_rate": 1.710978575130497e-05, + "loss": 0.2515, + "step": 16359, + "teacher_loss": 0.2453344464302063 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.4108779728412628, + "learning_rate": 1.710753705577639e-05, + "loss": 0.3026, + "step": 16360, + "teacher_loss": 0.2906193733215332 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.43675220012664795, + "learning_rate": 1.7105288311928303e-05, + "loss": 0.3134, + "step": 16361, + "teacher_loss": 0.2997252345085144 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.12622493505477905, + "learning_rate": 1.7103039519812274e-05, + "loss": 0.1655, + "step": 16362, + "teacher_loss": 0.1698666512966156 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.6012797355651855, + "learning_rate": 1.7100790679479845e-05, + "loss": 0.4318, + "step": 16363, + "teacher_loss": 0.41300714015960693 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.3440447449684143, + "learning_rate": 1.709854179098259e-05, + "loss": 0.3186, + "step": 16364, + "teacher_loss": 0.31572195887565613 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5214061737060547, + "learning_rate": 1.7096292854372063e-05, + "loss": 0.4038, + "step": 16365, + "teacher_loss": 0.39077338576316833 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.37245213985443115, + "learning_rate": 1.7094043869699824e-05, + "loss": 0.2563, + "step": 16366, + "teacher_loss": 0.24339744448661804 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5611665844917297, + "learning_rate": 1.7091794837017438e-05, + "loss": 0.2725, + "step": 16367, + "teacher_loss": 0.24038168787956238 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5801632404327393, + "learning_rate": 1.7089545756376467e-05, + "loss": 0.2354, + "step": 16368, + "teacher_loss": 0.19712863862514496 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5452362298965454, + "learning_rate": 1.7087296627828478e-05, + "loss": 0.2517, + "step": 16369, + "teacher_loss": 0.21910472214221954 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.2074383795261383, + "learning_rate": 1.7085047451425035e-05, + "loss": 0.1556, + "step": 16370, + "teacher_loss": 0.14980915188789368 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 1.001579999923706, + "learning_rate": 1.7082798227217702e-05, + "loss": 0.5016, + "step": 16371, + "teacher_loss": 0.4460577070713043 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.39290955662727356, + "learning_rate": 1.7080548955258054e-05, + "loss": 0.1881, + "step": 16372, + "teacher_loss": 0.16534891724586487 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5870050191879272, + "learning_rate": 1.707829963559766e-05, + "loss": 0.2937, + "step": 16373, + "teacher_loss": 0.2610911726951599 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.07918013632297516, + "learning_rate": 1.707605026828808e-05, + "loss": 0.2388, + "step": 16374, + "teacher_loss": 0.2565152645111084 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.47072678804397583, + "learning_rate": 1.707380085338089e-05, + "loss": 0.3791, + "step": 16375, + "teacher_loss": 0.36890822649002075 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.4134020209312439, + "learning_rate": 1.7071551390927667e-05, + "loss": 0.2552, + "step": 16376, + "teacher_loss": 0.23756662011146545 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.34473299980163574, + "learning_rate": 1.7069301880979983e-05, + "loss": 0.2172, + "step": 16377, + "teacher_loss": 0.20299117267131805 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.4672027826309204, + "learning_rate": 1.7067052323589408e-05, + "loss": 0.3335, + "step": 16378, + "teacher_loss": 0.3186095058917999 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.24797323346138, + "learning_rate": 1.7064802718807526e-05, + "loss": 0.2267, + "step": 16379, + "teacher_loss": 0.22430217266082764 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.22770924866199493, + "learning_rate": 1.70625530666859e-05, + "loss": 0.2809, + "step": 16380, + "teacher_loss": 0.2868365943431854 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5744701623916626, + "learning_rate": 1.7060303367276123e-05, + "loss": 0.2904, + "step": 16381, + "teacher_loss": 0.25887537002563477 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.4150558114051819, + "learning_rate": 1.7058053620629768e-05, + "loss": 0.2386, + "step": 16382, + "teacher_loss": 0.21898195147514343 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.44860827922821045, + "learning_rate": 1.7055803826798406e-05, + "loss": 0.318, + "step": 16383, + "teacher_loss": 0.30348485708236694 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.18151721358299255, + "learning_rate": 1.705355398583363e-05, + "loss": 0.1874, + "step": 16384, + "teacher_loss": 0.18804213404655457 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.8944460153579712, + "learning_rate": 1.7051304097787018e-05, + "loss": 0.4166, + "step": 16385, + "teacher_loss": 0.36347895860671997 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.28600141406059265, + "learning_rate": 1.7049054162710154e-05, + "loss": 0.2072, + "step": 16386, + "teacher_loss": 0.19845086336135864 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.7274162173271179, + "learning_rate": 1.7046804180654623e-05, + "loss": 0.5982, + "step": 16387, + "teacher_loss": 0.5837960243225098 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5957872271537781, + "learning_rate": 1.7044554151672003e-05, + "loss": 0.2966, + "step": 16388, + "teacher_loss": 0.2633640468120575 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.37750643491744995, + "learning_rate": 1.7042304075813893e-05, + "loss": 0.3656, + "step": 16389, + "teacher_loss": 0.3642995357513428 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.24716952443122864, + "learning_rate": 1.7040053953131872e-05, + "loss": 0.1606, + "step": 16390, + "teacher_loss": 0.15096841752529144 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.7059513330459595, + "learning_rate": 1.7037803783677525e-05, + "loss": 0.387, + "step": 16391, + "teacher_loss": 0.35151395201683044 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5893921852111816, + "learning_rate": 1.7035553567502457e-05, + "loss": 0.3849, + "step": 16392, + "teacher_loss": 0.3621862530708313 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5164104700088501, + "learning_rate": 1.703330330465824e-05, + "loss": 0.2122, + "step": 16393, + "teacher_loss": 0.1783895641565323 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5739178657531738, + "learning_rate": 1.7031052995196475e-05, + "loss": 0.2414, + "step": 16394, + "teacher_loss": 0.20444750785827637 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.38800162076950073, + "learning_rate": 1.702880263916876e-05, + "loss": 0.281, + "step": 16395, + "teacher_loss": 0.2691548764705658 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5381648540496826, + "learning_rate": 1.7026552236626676e-05, + "loss": 0.2259, + "step": 16396, + "teacher_loss": 0.1911858469247818 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.40319883823394775, + "learning_rate": 1.7024301787621828e-05, + "loss": 0.285, + "step": 16397, + "teacher_loss": 0.2718772888183594 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.4576597809791565, + "learning_rate": 1.702205129220581e-05, + "loss": 0.2842, + "step": 16398, + "teacher_loss": 0.2648809850215912 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.34787678718566895, + "learning_rate": 1.7019800750430218e-05, + "loss": 0.2234, + "step": 16399, + "teacher_loss": 0.20955616235733032 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.5116361379623413, + "learning_rate": 1.701755016234665e-05, + "loss": 0.3149, + "step": 16400, + "teacher_loss": 0.2930814027786255 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.6844651699066162, + "learning_rate": 1.7015299528006702e-05, + "loss": 0.3903, + "step": 16401, + "teacher_loss": 0.35759711265563965 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.2190747857093811, + "learning_rate": 1.7013048847461982e-05, + "loss": 0.232, + "step": 16402, + "teacher_loss": 0.23348617553710938 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.3803383708000183, + "learning_rate": 1.701079812076409e-05, + "loss": 0.2916, + "step": 16403, + "teacher_loss": 0.2816971242427826 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.6304861307144165, + "learning_rate": 1.7008547347964625e-05, + "loss": 0.4758, + "step": 16404, + "teacher_loss": 0.4586649537086487 + }, + { + "compression_loss": 0.0, + "epoch": 2.96, + "label_loss": 0.6817525625228882, + "learning_rate": 1.7006296529115186e-05, + "loss": 0.3006, + "step": 16405, + "teacher_loss": 0.25820252299308777 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.5929562449455261, + "learning_rate": 1.7004045664267386e-05, + "loss": 0.3355, + "step": 16406, + "teacher_loss": 0.3068896234035492 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.37227070331573486, + "learning_rate": 1.700179475347283e-05, + "loss": 0.3724, + "step": 16407, + "teacher_loss": 0.3724183440208435 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.6756417751312256, + "learning_rate": 1.6999543796783117e-05, + "loss": 0.2342, + "step": 16408, + "teacher_loss": 0.18512097001075745 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.27274563908576965, + "learning_rate": 1.699729279424986e-05, + "loss": 0.1751, + "step": 16409, + "teacher_loss": 0.1642094850540161 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.30976438522338867, + "learning_rate": 1.6995041745924672e-05, + "loss": 0.2722, + "step": 16410, + "teacher_loss": 0.26806509494781494 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4951731562614441, + "learning_rate": 1.6992790651859155e-05, + "loss": 0.2253, + "step": 16411, + "teacher_loss": 0.1953664869070053 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.5574835538864136, + "learning_rate": 1.6990539512104925e-05, + "loss": 0.2659, + "step": 16412, + "teacher_loss": 0.23350423574447632 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.2652541995048523, + "learning_rate": 1.698828832671359e-05, + "loss": 0.1691, + "step": 16413, + "teacher_loss": 0.15836912393569946 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.6703963875770569, + "learning_rate": 1.6986037095736764e-05, + "loss": 0.3054, + "step": 16414, + "teacher_loss": 0.26487770676612854 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4167998433113098, + "learning_rate": 1.6983785819226066e-05, + "loss": 0.1525, + "step": 16415, + "teacher_loss": 0.12311267852783203 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.28624194860458374, + "learning_rate": 1.6981534497233104e-05, + "loss": 0.2461, + "step": 16416, + "teacher_loss": 0.24166792631149292 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4696184992790222, + "learning_rate": 1.69792831298095e-05, + "loss": 0.2499, + "step": 16417, + "teacher_loss": 0.2255113273859024 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4593292474746704, + "learning_rate": 1.697703171700686e-05, + "loss": 0.3819, + "step": 16418, + "teacher_loss": 0.37332725524902344 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.18037673830986023, + "learning_rate": 1.6974780258876814e-05, + "loss": 0.1976, + "step": 16419, + "teacher_loss": 0.19952178001403809 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.425466388463974, + "learning_rate": 1.6972528755470983e-05, + "loss": 0.3145, + "step": 16420, + "teacher_loss": 0.3021823465824127 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.5457009077072144, + "learning_rate": 1.6970277206840976e-05, + "loss": 0.3867, + "step": 16421, + "teacher_loss": 0.3690539002418518 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.2696635127067566, + "learning_rate": 1.6968025613038424e-05, + "loss": 0.2131, + "step": 16422, + "teacher_loss": 0.20682938396930695 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.45378175377845764, + "learning_rate": 1.696577397411494e-05, + "loss": 0.2452, + "step": 16423, + "teacher_loss": 0.22202830016613007 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.3617762327194214, + "learning_rate": 1.6963522290122155e-05, + "loss": 0.2023, + "step": 16424, + "teacher_loss": 0.1845371574163437 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.5048460960388184, + "learning_rate": 1.696127056111169e-05, + "loss": 0.2368, + "step": 16425, + "teacher_loss": 0.2069961130619049 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.45305269956588745, + "learning_rate": 1.6959018787135172e-05, + "loss": 0.2041, + "step": 16426, + "teacher_loss": 0.17649075388908386 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.40985673666000366, + "learning_rate": 1.6956766968244226e-05, + "loss": 0.3094, + "step": 16427, + "teacher_loss": 0.2982270419597626 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.5064584016799927, + "learning_rate": 1.695451510449048e-05, + "loss": 0.304, + "step": 16428, + "teacher_loss": 0.2815263867378235 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.353840172290802, + "learning_rate": 1.6952263195925567e-05, + "loss": 0.2339, + "step": 16429, + "teacher_loss": 0.2205272614955902 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.3625691533088684, + "learning_rate": 1.69500112426011e-05, + "loss": 0.2205, + "step": 16430, + "teacher_loss": 0.20474043488502502 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.8140820264816284, + "learning_rate": 1.6947759244568734e-05, + "loss": 0.5425, + "step": 16431, + "teacher_loss": 0.512336015701294 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4460495710372925, + "learning_rate": 1.6945507201880086e-05, + "loss": 0.3836, + "step": 16432, + "teacher_loss": 0.3766152262687683 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.7889648675918579, + "learning_rate": 1.6943255114586788e-05, + "loss": 0.2922, + "step": 16433, + "teacher_loss": 0.23696674406528473 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.25190770626068115, + "learning_rate": 1.6941002982740475e-05, + "loss": 0.1738, + "step": 16434, + "teacher_loss": 0.16512750089168549 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.27945244312286377, + "learning_rate": 1.693875080639279e-05, + "loss": 0.2964, + "step": 16435, + "teacher_loss": 0.2983161211013794 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.33559486269950867, + "learning_rate": 1.6936498585595355e-05, + "loss": 0.2061, + "step": 16436, + "teacher_loss": 0.19168466329574585 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.37142181396484375, + "learning_rate": 1.6934246320399818e-05, + "loss": 0.2319, + "step": 16437, + "teacher_loss": 0.21643032133579254 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.2934309244155884, + "learning_rate": 1.6931994010857804e-05, + "loss": 0.211, + "step": 16438, + "teacher_loss": 0.20188406109809875 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.634645938873291, + "learning_rate": 1.6929741657020964e-05, + "loss": 0.3068, + "step": 16439, + "teacher_loss": 0.2703203558921814 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.358377605676651, + "learning_rate": 1.692748925894094e-05, + "loss": 0.2419, + "step": 16440, + "teacher_loss": 0.22893080115318298 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.7055276036262512, + "learning_rate": 1.6925236816669358e-05, + "loss": 0.316, + "step": 16441, + "teacher_loss": 0.2727735638618469 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.5994584560394287, + "learning_rate": 1.6922984330257875e-05, + "loss": 0.3804, + "step": 16442, + "teacher_loss": 0.35602110624313354 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4030289649963379, + "learning_rate": 1.692073179975812e-05, + "loss": 0.2555, + "step": 16443, + "teacher_loss": 0.23912355303764343 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.28601977229118347, + "learning_rate": 1.6918479225221744e-05, + "loss": 0.2442, + "step": 16444, + "teacher_loss": 0.23960527777671814 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.3167138695716858, + "learning_rate": 1.6916226606700396e-05, + "loss": 0.2695, + "step": 16445, + "teacher_loss": 0.26420146226882935 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.6018577814102173, + "learning_rate": 1.6913973944245713e-05, + "loss": 0.2388, + "step": 16446, + "teacher_loss": 0.19851410388946533 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 1.1342229843139648, + "learning_rate": 1.691172123790935e-05, + "loss": 0.3924, + "step": 16447, + "teacher_loss": 0.3100038766860962 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.9424023032188416, + "learning_rate": 1.6909468487742947e-05, + "loss": 0.3751, + "step": 16448, + "teacher_loss": 0.3120918571949005 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.44704148173332214, + "learning_rate": 1.6907215693798155e-05, + "loss": 0.1759, + "step": 16449, + "teacher_loss": 0.14577841758728027 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.4822198748588562, + "learning_rate": 1.690496285612663e-05, + "loss": 0.2854, + "step": 16450, + "teacher_loss": 0.2634811997413635 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 1.1144938468933105, + "learning_rate": 1.690270997478001e-05, + "loss": 0.4803, + "step": 16451, + "teacher_loss": 0.40985995531082153 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.2706015706062317, + "learning_rate": 1.6900457049809963e-05, + "loss": 0.1745, + "step": 16452, + "teacher_loss": 0.163822203874588 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.29608991742134094, + "learning_rate": 1.6898204081268134e-05, + "loss": 0.23, + "step": 16453, + "teacher_loss": 0.22270649671554565 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.952675461769104, + "learning_rate": 1.6895951069206175e-05, + "loss": 0.3429, + "step": 16454, + "teacher_loss": 0.2751733958721161 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.31866759061813354, + "learning_rate": 1.6893698013675742e-05, + "loss": 0.2194, + "step": 16455, + "teacher_loss": 0.20841509103775024 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.22175806760787964, + "learning_rate": 1.6891444914728484e-05, + "loss": 0.1823, + "step": 16456, + "teacher_loss": 0.1779344379901886 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.31399017572402954, + "learning_rate": 1.6889191772416073e-05, + "loss": 0.2223, + "step": 16457, + "teacher_loss": 0.2121584266424179 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.35604584217071533, + "learning_rate": 1.6886938586790158e-05, + "loss": 0.2001, + "step": 16458, + "teacher_loss": 0.1828138530254364 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.3081555664539337, + "learning_rate": 1.6884685357902395e-05, + "loss": 0.2668, + "step": 16459, + "teacher_loss": 0.2622055411338806 + }, + { + "compression_loss": 0.0, + "epoch": 2.97, + "label_loss": 0.699011504650116, + "learning_rate": 1.688243208580445e-05, + "loss": 0.438, + "step": 16460, + "teacher_loss": 0.4090268015861511 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.22945944964885712, + "learning_rate": 1.6880178770547984e-05, + "loss": 0.2791, + "step": 16461, + "teacher_loss": 0.28464236855506897 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.563493549823761, + "learning_rate": 1.687792541218465e-05, + "loss": 0.2913, + "step": 16462, + "teacher_loss": 0.2610647976398468 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.8003162145614624, + "learning_rate": 1.6875672010766126e-05, + "loss": 0.2721, + "step": 16463, + "teacher_loss": 0.21337465941905975 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.40353572368621826, + "learning_rate": 1.6873418566344056e-05, + "loss": 0.2317, + "step": 16464, + "teacher_loss": 0.21256610751152039 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.2510928511619568, + "learning_rate": 1.6871165078970118e-05, + "loss": 0.2974, + "step": 16465, + "teacher_loss": 0.3025716543197632 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.35506492853164673, + "learning_rate": 1.6868911548695977e-05, + "loss": 0.2236, + "step": 16466, + "teacher_loss": 0.20897358655929565 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.4220644533634186, + "learning_rate": 1.68666579755733e-05, + "loss": 0.2438, + "step": 16467, + "teacher_loss": 0.22401660680770874 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 1.0676326751708984, + "learning_rate": 1.6864404359653742e-05, + "loss": 0.3961, + "step": 16468, + "teacher_loss": 0.3214343786239624 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.26833629608154297, + "learning_rate": 1.686215070098899e-05, + "loss": 0.1411, + "step": 16469, + "teacher_loss": 0.12698838114738464 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5252071022987366, + "learning_rate": 1.6859896999630703e-05, + "loss": 0.4438, + "step": 16470, + "teacher_loss": 0.43476295471191406 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.42853832244873047, + "learning_rate": 1.685764325563055e-05, + "loss": 0.2661, + "step": 16471, + "teacher_loss": 0.24807879328727722 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.49477314949035645, + "learning_rate": 1.6855389469040217e-05, + "loss": 0.2616, + "step": 16472, + "teacher_loss": 0.23570013046264648 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3834612965583801, + "learning_rate": 1.6853135639911357e-05, + "loss": 0.239, + "step": 16473, + "teacher_loss": 0.2229882925748825 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.1757422238588333, + "learning_rate": 1.685088176829566e-05, + "loss": 0.2537, + "step": 16474, + "teacher_loss": 0.2623230814933777 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.7384520769119263, + "learning_rate": 1.684862785424479e-05, + "loss": 0.2589, + "step": 16475, + "teacher_loss": 0.20565253496170044 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.12380479276180267, + "learning_rate": 1.684637389781042e-05, + "loss": 0.1656, + "step": 16476, + "teacher_loss": 0.1702931821346283 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.6220978498458862, + "learning_rate": 1.6844119899044242e-05, + "loss": 0.3464, + "step": 16477, + "teacher_loss": 0.3157762885093689 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.49162235856056213, + "learning_rate": 1.6841865857997917e-05, + "loss": 0.1985, + "step": 16478, + "teacher_loss": 0.16591408848762512 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.35095080733299255, + "learning_rate": 1.683961177472313e-05, + "loss": 0.236, + "step": 16479, + "teacher_loss": 0.22325092554092407 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.26990777254104614, + "learning_rate": 1.6837357649271565e-05, + "loss": 0.268, + "step": 16480, + "teacher_loss": 0.2677558958530426 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.33736637234687805, + "learning_rate": 1.6835103481694893e-05, + "loss": 0.2441, + "step": 16481, + "teacher_loss": 0.2337876260280609 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.38045647740364075, + "learning_rate": 1.6832849272044804e-05, + "loss": 0.2291, + "step": 16482, + "teacher_loss": 0.2123154103755951 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.8675621151924133, + "learning_rate": 1.683059502037298e-05, + "loss": 0.48, + "step": 16483, + "teacher_loss": 0.4369434118270874 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 1.172670602798462, + "learning_rate": 1.682834072673109e-05, + "loss": 0.3942, + "step": 16484, + "teacher_loss": 0.3076777756214142 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.31893390417099, + "learning_rate": 1.682608639117084e-05, + "loss": 0.446, + "step": 16485, + "teacher_loss": 0.4601181745529175 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.2271900773048401, + "learning_rate": 1.6823832013743893e-05, + "loss": 0.1729, + "step": 16486, + "teacher_loss": 0.16684690117835999 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3577190041542053, + "learning_rate": 1.6821577594501955e-05, + "loss": 0.3096, + "step": 16487, + "teacher_loss": 0.30424898862838745 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.7421973347663879, + "learning_rate": 1.6819323133496702e-05, + "loss": 0.263, + "step": 16488, + "teacher_loss": 0.2097875475883484 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5481189489364624, + "learning_rate": 1.681706863077982e-05, + "loss": 0.2896, + "step": 16489, + "teacher_loss": 0.260861873626709 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5275344848632812, + "learning_rate": 1.6814814086403004e-05, + "loss": 0.2338, + "step": 16490, + "teacher_loss": 0.20114298164844513 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.7977844476699829, + "learning_rate": 1.6812559500417945e-05, + "loss": 0.2862, + "step": 16491, + "teacher_loss": 0.22936393320560455 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.7897177934646606, + "learning_rate": 1.6810304872876327e-05, + "loss": 0.5354, + "step": 16492, + "teacher_loss": 0.507138729095459 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.22917184233665466, + "learning_rate": 1.6808050203829845e-05, + "loss": 0.192, + "step": 16493, + "teacher_loss": 0.18786287307739258 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5464550256729126, + "learning_rate": 1.6805795493330197e-05, + "loss": 0.2946, + "step": 16494, + "teacher_loss": 0.2665936350822449 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.23428234457969666, + "learning_rate": 1.6803540741429073e-05, + "loss": 0.1962, + "step": 16495, + "teacher_loss": 0.19198592007160187 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.29213082790374756, + "learning_rate": 1.6801285948178165e-05, + "loss": 0.2206, + "step": 16496, + "teacher_loss": 0.21268031001091003 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.33765098452568054, + "learning_rate": 1.679903111362917e-05, + "loss": 0.3796, + "step": 16497, + "teacher_loss": 0.3842237591743469 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3511248528957367, + "learning_rate": 1.6796776237833783e-05, + "loss": 0.2218, + "step": 16498, + "teacher_loss": 0.20748554170131683 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.49120283126831055, + "learning_rate": 1.6794521320843708e-05, + "loss": 0.2419, + "step": 16499, + "teacher_loss": 0.2142144739627838 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5249272584915161, + "learning_rate": 1.6792266362710637e-05, + "loss": 0.2947, + "step": 16500, + "teacher_loss": 0.2690713405609131 + }, + { + "epoch": 2.98, + "eval_exact_match": 80.07568590350047, + "eval_f1": 87.3856394382346, + "step": 16500 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3857632279396057, + "learning_rate": 1.6790011363486273e-05, + "loss": 0.3363, + "step": 16501, + "teacher_loss": 0.3307896554470062 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.8408334255218506, + "learning_rate": 1.6787756323222316e-05, + "loss": 0.2703, + "step": 16502, + "teacher_loss": 0.20691636204719543 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.581544041633606, + "learning_rate": 1.6785501241970465e-05, + "loss": 0.2262, + "step": 16503, + "teacher_loss": 0.18675866723060608 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3728638291358948, + "learning_rate": 1.678324611978242e-05, + "loss": 0.1937, + "step": 16504, + "teacher_loss": 0.17378714680671692 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.28760457038879395, + "learning_rate": 1.6780990956709897e-05, + "loss": 0.2829, + "step": 16505, + "teacher_loss": 0.2823658585548401 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.34556347131729126, + "learning_rate": 1.6778735752804586e-05, + "loss": 0.2727, + "step": 16506, + "teacher_loss": 0.26455825567245483 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.6825772523880005, + "learning_rate": 1.67764805081182e-05, + "loss": 0.286, + "step": 16507, + "teacher_loss": 0.24195297062397003 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.429084450006485, + "learning_rate": 1.677422522270244e-05, + "loss": 0.4254, + "step": 16508, + "teacher_loss": 0.425011545419693 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5202856659889221, + "learning_rate": 1.677196989660901e-05, + "loss": 0.2063, + "step": 16509, + "teacher_loss": 0.17143088579177856 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3685051202774048, + "learning_rate": 1.676971452988963e-05, + "loss": 0.3815, + "step": 16510, + "teacher_loss": 0.38297921419143677 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.5092839002609253, + "learning_rate": 1.6767459122595998e-05, + "loss": 0.1889, + "step": 16511, + "teacher_loss": 0.15328705310821533 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 1.247352957725525, + "learning_rate": 1.676520367477983e-05, + "loss": 0.3729, + "step": 16512, + "teacher_loss": 0.27576154470443726 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3702830672264099, + "learning_rate": 1.6762948186492836e-05, + "loss": 0.2241, + "step": 16513, + "teacher_loss": 0.20789921283721924 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.7963496446609497, + "learning_rate": 1.6760692657786717e-05, + "loss": 0.3029, + "step": 16514, + "teacher_loss": 0.2480759620666504 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.24863898754119873, + "learning_rate": 1.67584370887132e-05, + "loss": 0.2135, + "step": 16515, + "teacher_loss": 0.2095625102519989 + }, + { + "compression_loss": 0.0, + "epoch": 2.98, + "label_loss": 0.3384447693824768, + "learning_rate": 1.6756181479323994e-05, + "loss": 0.2206, + "step": 16516, + "teacher_loss": 0.20747318863868713 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.745774507522583, + "learning_rate": 1.675392582967081e-05, + "loss": 0.3068, + "step": 16517, + "teacher_loss": 0.25808006525039673 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.32220590114593506, + "learning_rate": 1.6751670139805365e-05, + "loss": 0.1803, + "step": 16518, + "teacher_loss": 0.16451692581176758 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5340006351470947, + "learning_rate": 1.6749414409779372e-05, + "loss": 0.2666, + "step": 16519, + "teacher_loss": 0.23688694834709167 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4929790198802948, + "learning_rate": 1.674715863964456e-05, + "loss": 0.3195, + "step": 16520, + "teacher_loss": 0.300197958946228 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.38084110617637634, + "learning_rate": 1.6744902829452634e-05, + "loss": 0.2157, + "step": 16521, + "teacher_loss": 0.1973927617073059 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4732380509376526, + "learning_rate": 1.674264697925532e-05, + "loss": 0.2229, + "step": 16522, + "teacher_loss": 0.195109024643898 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 1.4688782691955566, + "learning_rate": 1.674039108910433e-05, + "loss": 0.4256, + "step": 16523, + "teacher_loss": 0.30963513255119324 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4075910449028015, + "learning_rate": 1.6738135159051392e-05, + "loss": 0.1982, + "step": 16524, + "teacher_loss": 0.17494887113571167 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 1.04481840133667, + "learning_rate": 1.673587918914823e-05, + "loss": 0.3296, + "step": 16525, + "teacher_loss": 0.25009918212890625 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.31203579902648926, + "learning_rate": 1.673362317944656e-05, + "loss": 0.2834, + "step": 16526, + "teacher_loss": 0.28017544746398926 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.2082904428243637, + "learning_rate": 1.673136712999811e-05, + "loss": 0.2383, + "step": 16527, + "teacher_loss": 0.24165067076683044 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.7093226909637451, + "learning_rate": 1.6729111040854597e-05, + "loss": 0.3567, + "step": 16528, + "teacher_loss": 0.31753838062286377 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4865039587020874, + "learning_rate": 1.672685491206776e-05, + "loss": 0.2373, + "step": 16529, + "teacher_loss": 0.2096029818058014 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.58197021484375, + "learning_rate": 1.6724598743689314e-05, + "loss": 0.3135, + "step": 16530, + "teacher_loss": 0.2836433947086334 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.6645232439041138, + "learning_rate": 1.6722342535770984e-05, + "loss": 0.4378, + "step": 16531, + "teacher_loss": 0.4126243591308594 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.9061720371246338, + "learning_rate": 1.672008628836451e-05, + "loss": 0.3688, + "step": 16532, + "teacher_loss": 0.3090691566467285 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4916762709617615, + "learning_rate": 1.6717830001521613e-05, + "loss": 0.3221, + "step": 16533, + "teacher_loss": 0.30325937271118164 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 1.0665887594223022, + "learning_rate": 1.671557367529402e-05, + "loss": 0.395, + "step": 16534, + "teacher_loss": 0.3203321099281311 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.7706929445266724, + "learning_rate": 1.6713317309733475e-05, + "loss": 0.2187, + "step": 16535, + "teacher_loss": 0.15739917755126953 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5842939615249634, + "learning_rate": 1.6711060904891695e-05, + "loss": 0.3261, + "step": 16536, + "teacher_loss": 0.297380268573761 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.6443324089050293, + "learning_rate": 1.670880446082042e-05, + "loss": 0.3627, + "step": 16537, + "teacher_loss": 0.33142781257629395 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5103682279586792, + "learning_rate": 1.6706547977571382e-05, + "loss": 0.2864, + "step": 16538, + "teacher_loss": 0.2615398168563843 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.3172757923603058, + "learning_rate": 1.6704291455196313e-05, + "loss": 0.2344, + "step": 16539, + "teacher_loss": 0.22523006796836853 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.3370593786239624, + "learning_rate": 1.6702034893746953e-05, + "loss": 0.2617, + "step": 16540, + "teacher_loss": 0.25331878662109375 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.14756816625595093, + "learning_rate": 1.6699778293275033e-05, + "loss": 0.211, + "step": 16541, + "teacher_loss": 0.2180483639240265 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5081421732902527, + "learning_rate": 1.6697521653832296e-05, + "loss": 0.4599, + "step": 16542, + "teacher_loss": 0.4545362889766693 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.26985251903533936, + "learning_rate": 1.6695264975470478e-05, + "loss": 0.1717, + "step": 16543, + "teacher_loss": 0.16077929735183716 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4301908612251282, + "learning_rate": 1.6693008258241314e-05, + "loss": 0.2931, + "step": 16544, + "teacher_loss": 0.2778944969177246 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.32233282923698425, + "learning_rate": 1.669075150219655e-05, + "loss": 0.2474, + "step": 16545, + "teacher_loss": 0.23904860019683838 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.6680991053581238, + "learning_rate": 1.668849470738792e-05, + "loss": 0.2547, + "step": 16546, + "teacher_loss": 0.20872417092323303 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.2690372169017792, + "learning_rate": 1.6686237873867172e-05, + "loss": 0.2655, + "step": 16547, + "teacher_loss": 0.2651504874229431 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.840910017490387, + "learning_rate": 1.6683981001686042e-05, + "loss": 0.3049, + "step": 16548, + "teacher_loss": 0.2453339844942093 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.45750534534454346, + "learning_rate": 1.6681724090896273e-05, + "loss": 0.182, + "step": 16549, + "teacher_loss": 0.1514366865158081 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5426023006439209, + "learning_rate": 1.667946714154962e-05, + "loss": 0.3178, + "step": 16550, + "teacher_loss": 0.2927940785884857 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.26622992753982544, + "learning_rate": 1.6677210153697815e-05, + "loss": 0.1899, + "step": 16551, + "teacher_loss": 0.1814207285642624 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.39546990394592285, + "learning_rate": 1.6674953127392617e-05, + "loss": 0.3249, + "step": 16552, + "teacher_loss": 0.3171122074127197 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.31601548194885254, + "learning_rate": 1.6672696062685757e-05, + "loss": 0.2023, + "step": 16553, + "teacher_loss": 0.18966734409332275 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.7636637091636658, + "learning_rate": 1.6670438959628996e-05, + "loss": 0.3412, + "step": 16554, + "teacher_loss": 0.2942391633987427 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.6111501455307007, + "learning_rate": 1.6668181818274077e-05, + "loss": 0.3175, + "step": 16555, + "teacher_loss": 0.2848797142505646 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.704237163066864, + "learning_rate": 1.6665924638672747e-05, + "loss": 0.3498, + "step": 16556, + "teacher_loss": 0.31045907735824585 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.44039326906204224, + "learning_rate": 1.6663667420876767e-05, + "loss": 0.211, + "step": 16557, + "teacher_loss": 0.18547756969928741 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.624261200428009, + "learning_rate": 1.6661410164937874e-05, + "loss": 0.3824, + "step": 16558, + "teacher_loss": 0.3555159568786621 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.2920651137828827, + "learning_rate": 1.6659152870907835e-05, + "loss": 0.1634, + "step": 16559, + "teacher_loss": 0.14914312958717346 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.2501271963119507, + "learning_rate": 1.665689553883839e-05, + "loss": 0.2149, + "step": 16560, + "teacher_loss": 0.21100470423698425 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.1721057891845703, + "learning_rate": 1.6654638168781295e-05, + "loss": 0.1487, + "step": 16561, + "teacher_loss": 0.1461189240217209 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.6181367039680481, + "learning_rate": 1.6652380760788312e-05, + "loss": 0.305, + "step": 16562, + "teacher_loss": 0.2702442407608032 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5905710458755493, + "learning_rate": 1.665012331491119e-05, + "loss": 0.2196, + "step": 16563, + "teacher_loss": 0.17832598090171814 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4411013126373291, + "learning_rate": 1.6647865831201686e-05, + "loss": 0.213, + "step": 16564, + "teacher_loss": 0.18764615058898926 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.11961507052183151, + "learning_rate": 1.664560830971157e-05, + "loss": 0.165, + "step": 16565, + "teacher_loss": 0.17003558576107025 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.7063257694244385, + "learning_rate": 1.6643350750492578e-05, + "loss": 0.2899, + "step": 16566, + "teacher_loss": 0.2436167150735855 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.3910992741584778, + "learning_rate": 1.6641093153596482e-05, + "loss": 0.3091, + "step": 16567, + "teacher_loss": 0.299952894449234 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.5786798596382141, + "learning_rate": 1.6638835519075044e-05, + "loss": 0.2595, + "step": 16568, + "teacher_loss": 0.22404122352600098 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.3451366424560547, + "learning_rate": 1.6636577846980014e-05, + "loss": 0.2421, + "step": 16569, + "teacher_loss": 0.23060841858386993 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.4296899735927582, + "learning_rate": 1.663432013736317e-05, + "loss": 0.2129, + "step": 16570, + "teacher_loss": 0.18877197802066803 + }, + { + "compression_loss": 0.0, + "epoch": 2.99, + "label_loss": 0.26885688304901123, + "learning_rate": 1.663206239027626e-05, + "loss": 0.2417, + "step": 16571, + "teacher_loss": 0.23864570260047913 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.34053635597229004, + "learning_rate": 1.662980460577105e-05, + "loss": 0.2715, + "step": 16572, + "teacher_loss": 0.2638576030731201 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.5596696734428406, + "learning_rate": 1.662754678389931e-05, + "loss": 0.3635, + "step": 16573, + "teacher_loss": 0.3417547345161438 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.45566555857658386, + "learning_rate": 1.6625288924712803e-05, + "loss": 0.2182, + "step": 16574, + "teacher_loss": 0.19183024764060974 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3131885528564453, + "learning_rate": 1.6623031028263292e-05, + "loss": 0.2156, + "step": 16575, + "teacher_loss": 0.20480357110500336 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.6385051012039185, + "learning_rate": 1.6620773094602546e-05, + "loss": 0.3175, + "step": 16576, + "teacher_loss": 0.28179389238357544 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 1.177838921546936, + "learning_rate": 1.6618515123782337e-05, + "loss": 0.2932, + "step": 16577, + "teacher_loss": 0.1949024796485901 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.2383987307548523, + "learning_rate": 1.6616257115854423e-05, + "loss": 0.2055, + "step": 16578, + "teacher_loss": 0.2017991840839386 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.38498571515083313, + "learning_rate": 1.6613999070870577e-05, + "loss": 0.1875, + "step": 16579, + "teacher_loss": 0.16560658812522888 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.6439411640167236, + "learning_rate": 1.6611740988882575e-05, + "loss": 0.2697, + "step": 16580, + "teacher_loss": 0.22809717059135437 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.42243415117263794, + "learning_rate": 1.6609482869942185e-05, + "loss": 0.2345, + "step": 16581, + "teacher_loss": 0.21357378363609314 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.20221224427223206, + "learning_rate": 1.6607224714101177e-05, + "loss": 0.2179, + "step": 16582, + "teacher_loss": 0.21959705650806427 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.4948495626449585, + "learning_rate": 1.6604966521411325e-05, + "loss": 0.247, + "step": 16583, + "teacher_loss": 0.21945181488990784 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.4590347409248352, + "learning_rate": 1.66027082919244e-05, + "loss": 0.3248, + "step": 16584, + "teacher_loss": 0.3099316358566284 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3514411449432373, + "learning_rate": 1.6600450025692184e-05, + "loss": 0.243, + "step": 16585, + "teacher_loss": 0.23097622394561768 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.35198014974594116, + "learning_rate": 1.6598191722766443e-05, + "loss": 0.216, + "step": 16586, + "teacher_loss": 0.20091629028320312 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.38897255063056946, + "learning_rate": 1.659593338319896e-05, + "loss": 0.2005, + "step": 16587, + "teacher_loss": 0.17952017486095428 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.7840558290481567, + "learning_rate": 1.6593675007041512e-05, + "loss": 0.3539, + "step": 16588, + "teacher_loss": 0.3061090111732483 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.7565650939941406, + "learning_rate": 1.659141659434587e-05, + "loss": 0.344, + "step": 16589, + "teacher_loss": 0.2981935143470764 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.39933091402053833, + "learning_rate": 1.658915814516382e-05, + "loss": 0.1869, + "step": 16590, + "teacher_loss": 0.1632416546344757 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.38342469930648804, + "learning_rate": 1.6586899659547137e-05, + "loss": 0.3322, + "step": 16591, + "teacher_loss": 0.3265453577041626 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.2905828058719635, + "learning_rate": 1.6584641137547603e-05, + "loss": 0.1981, + "step": 16592, + "teacher_loss": 0.18785709142684937 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.5319105386734009, + "learning_rate": 1.6582382579216996e-05, + "loss": 0.3245, + "step": 16593, + "teacher_loss": 0.3014667332172394 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.5163998007774353, + "learning_rate": 1.65801239846071e-05, + "loss": 0.1892, + "step": 16594, + "teacher_loss": 0.15287858247756958 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.20453672111034393, + "learning_rate": 1.6577865353769704e-05, + "loss": 0.1676, + "step": 16595, + "teacher_loss": 0.1634896695613861 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.7812080383300781, + "learning_rate": 1.657560668675658e-05, + "loss": 0.2918, + "step": 16596, + "teacher_loss": 0.2374630868434906 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.29870760440826416, + "learning_rate": 1.6573347983619522e-05, + "loss": 0.2748, + "step": 16597, + "teacher_loss": 0.2721807062625885 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 1.1312963962554932, + "learning_rate": 1.657108924441031e-05, + "loss": 0.702, + "step": 16598, + "teacher_loss": 0.6543383598327637 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 1.1894670724868774, + "learning_rate": 1.6568830469180733e-05, + "loss": 0.2987, + "step": 16599, + "teacher_loss": 0.19973570108413696 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.5973784327507019, + "learning_rate": 1.6566571657982582e-05, + "loss": 0.3253, + "step": 16600, + "teacher_loss": 0.29509496688842773 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.17872895300388336, + "learning_rate": 1.6564312810867635e-05, + "loss": 0.1739, + "step": 16601, + "teacher_loss": 0.17339558899402618 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.5491242408752441, + "learning_rate": 1.656205392788768e-05, + "loss": 0.2078, + "step": 16602, + "teacher_loss": 0.16988155245780945 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.7075532078742981, + "learning_rate": 1.655979500909452e-05, + "loss": 0.3171, + "step": 16603, + "teacher_loss": 0.2736976742744446 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3452551066875458, + "learning_rate": 1.6557536054539937e-05, + "loss": 0.2116, + "step": 16604, + "teacher_loss": 0.19679176807403564 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3967088758945465, + "learning_rate": 1.6555277064275717e-05, + "loss": 0.1871, + "step": 16605, + "teacher_loss": 0.16386398673057556 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.31807512044906616, + "learning_rate": 1.655301803835366e-05, + "loss": 0.2361, + "step": 16606, + "teacher_loss": 0.22699615359306335 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.7111173272132874, + "learning_rate": 1.655075897682555e-05, + "loss": 0.2677, + "step": 16607, + "teacher_loss": 0.21840360760688782 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.39180633425712585, + "learning_rate": 1.654849987974319e-05, + "loss": 0.2437, + "step": 16608, + "teacher_loss": 0.22723454236984253 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3105844259262085, + "learning_rate": 1.6546240747158375e-05, + "loss": 0.1985, + "step": 16609, + "teacher_loss": 0.1860843449831009 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.6416542530059814, + "learning_rate": 1.6543981579122895e-05, + "loss": 0.3021, + "step": 16610, + "teacher_loss": 0.2644268274307251 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.2533203959465027, + "learning_rate": 1.654172237568854e-05, + "loss": 0.174, + "step": 16611, + "teacher_loss": 0.1651584804058075 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.36402270197868347, + "learning_rate": 1.6539463136907116e-05, + "loss": 0.1866, + "step": 16612, + "teacher_loss": 0.1668454110622406 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.4679250717163086, + "learning_rate": 1.6537203862830416e-05, + "loss": 0.2869, + "step": 16613, + "teacher_loss": 0.26680639386177063 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.35215193033218384, + "learning_rate": 1.6534944553510244e-05, + "loss": 0.2226, + "step": 16614, + "teacher_loss": 0.20815637707710266 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.6027519702911377, + "learning_rate": 1.6532685208998398e-05, + "loss": 0.2986, + "step": 16615, + "teacher_loss": 0.26476943492889404 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.794066309928894, + "learning_rate": 1.6530425829346667e-05, + "loss": 0.3167, + "step": 16616, + "teacher_loss": 0.26363813877105713 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3358103930950165, + "learning_rate": 1.6528166414606862e-05, + "loss": 0.1929, + "step": 16617, + "teacher_loss": 0.17706041038036346 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3245517611503601, + "learning_rate": 1.652590696483079e-05, + "loss": 0.2363, + "step": 16618, + "teacher_loss": 0.2265392392873764 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.3604736924171448, + "learning_rate": 1.6523647480070235e-05, + "loss": 0.2081, + "step": 16619, + "teacher_loss": 0.19117727875709534 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.48979148268699646, + "learning_rate": 1.6521387960377023e-05, + "loss": 0.2568, + "step": 16620, + "teacher_loss": 0.23089753091335297 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.39932042360305786, + "learning_rate": 1.6519128405802937e-05, + "loss": 0.2302, + "step": 16621, + "teacher_loss": 0.21137796342372894 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.5675336122512817, + "learning_rate": 1.6516868816399798e-05, + "loss": 0.2342, + "step": 16622, + "teacher_loss": 0.19711177051067352 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.42040157318115234, + "learning_rate": 1.6514609192219403e-05, + "loss": 0.2137, + "step": 16623, + "teacher_loss": 0.19070178270339966 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.6413109302520752, + "learning_rate": 1.6512349533313555e-05, + "loss": 0.4367, + "step": 16624, + "teacher_loss": 0.4140129089355469 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.2512364089488983, + "learning_rate": 1.6510089839734078e-05, + "loss": 0.2013, + "step": 16625, + "teacher_loss": 0.195703387260437 + }, + { + "compression_loss": 0.0, + "epoch": 3.0, + "label_loss": 0.2540172040462494, + "learning_rate": 1.6507830111532755e-05, + "loss": 0.1943, + "step": 16626, + "teacher_loss": 0.18768110871315002 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.8628928661346436, + "learning_rate": 1.6505570348761413e-05, + "loss": 0.2209, + "step": 16627, + "teacher_loss": 0.14959722757339478 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.4223070740699768, + "learning_rate": 1.650331055147186e-05, + "loss": 0.2665, + "step": 16628, + "teacher_loss": 0.24918031692504883 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.29805606603622437, + "learning_rate": 1.6501050719715903e-05, + "loss": 0.2407, + "step": 16629, + "teacher_loss": 0.23434260487556458 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.5205034017562866, + "learning_rate": 1.6498790853545355e-05, + "loss": 0.2311, + "step": 16630, + "teacher_loss": 0.19898566603660583 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.3485151529312134, + "learning_rate": 1.649653095301202e-05, + "loss": 0.2221, + "step": 16631, + "teacher_loss": 0.20803453028202057 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.45519545674324036, + "learning_rate": 1.649427101816772e-05, + "loss": 0.2474, + "step": 16632, + "teacher_loss": 0.22427037358283997 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.42452654242515564, + "learning_rate": 1.649201104906427e-05, + "loss": 0.2039, + "step": 16633, + "teacher_loss": 0.17936545610427856 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.36761289834976196, + "learning_rate": 1.6489751045753472e-05, + "loss": 0.2311, + "step": 16634, + "teacher_loss": 0.21589171886444092 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.584121823310852, + "learning_rate": 1.6487491008287157e-05, + "loss": 0.2303, + "step": 16635, + "teacher_loss": 0.19100965559482574 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.28541135787963867, + "learning_rate": 1.6485230936717126e-05, + "loss": 0.1685, + "step": 16636, + "teacher_loss": 0.15548065304756165 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 1.1317944526672363, + "learning_rate": 1.6482970831095205e-05, + "loss": 0.2688, + "step": 16637, + "teacher_loss": 0.17290166020393372 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.5264564752578735, + "learning_rate": 1.648071069147321e-05, + "loss": 0.2745, + "step": 16638, + "teacher_loss": 0.24655510485172272 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.0864419937133789, + "learning_rate": 1.647845051790296e-05, + "loss": 0.146, + "step": 16639, + "teacher_loss": 0.15266850590705872 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.47381970286369324, + "learning_rate": 1.6476190310436267e-05, + "loss": 0.1876, + "step": 16640, + "teacher_loss": 0.15581119060516357 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.35481536388397217, + "learning_rate": 1.647393006912496e-05, + "loss": 0.2224, + "step": 16641, + "teacher_loss": 0.20765304565429688 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.2816130220890045, + "learning_rate": 1.6471669794020854e-05, + "loss": 0.2777, + "step": 16642, + "teacher_loss": 0.27730390429496765 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.4508807063102722, + "learning_rate": 1.6469409485175773e-05, + "loss": 0.2112, + "step": 16643, + "teacher_loss": 0.18454337120056152 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.18374405801296234, + "learning_rate": 1.646714914264154e-05, + "loss": 0.1827, + "step": 16644, + "teacher_loss": 0.18259277939796448 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.2373037487268448, + "learning_rate": 1.6464888766469973e-05, + "loss": 0.2039, + "step": 16645, + "teacher_loss": 0.20019501447677612 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.6330631971359253, + "learning_rate": 1.6462628356712896e-05, + "loss": 0.4479, + "step": 16646, + "teacher_loss": 0.42734837532043457 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.1081903874874115, + "learning_rate": 1.646036791342214e-05, + "loss": 0.1911, + "step": 16647, + "teacher_loss": 0.2003670483827591 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.3166697919368744, + "learning_rate": 1.6458107436649526e-05, + "loss": 0.2029, + "step": 16648, + "teacher_loss": 0.19026033580303192 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.33273324370384216, + "learning_rate": 1.6455846926446875e-05, + "loss": 0.2115, + "step": 16649, + "teacher_loss": 0.19801700115203857 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.30150771141052246, + "learning_rate": 1.645358638286603e-05, + "loss": 0.2404, + "step": 16650, + "teacher_loss": 0.23362015187740326 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.983533501625061, + "learning_rate": 1.6451325805958796e-05, + "loss": 0.4752, + "step": 16651, + "teacher_loss": 0.41876423358917236 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.44914886355400085, + "learning_rate": 1.6449065195777018e-05, + "loss": 0.2536, + "step": 16652, + "teacher_loss": 0.23183780908584595 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.5146439671516418, + "learning_rate": 1.644680455237252e-05, + "loss": 0.2204, + "step": 16653, + "teacher_loss": 0.18770773708820343 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.1629735827445984, + "learning_rate": 1.6444543875797128e-05, + "loss": 0.1417, + "step": 16654, + "teacher_loss": 0.1393834948539734 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.881411612033844, + "learning_rate": 1.6442283166102677e-05, + "loss": 0.4749, + "step": 16655, + "teacher_loss": 0.42971351742744446 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.4598774313926697, + "learning_rate": 1.6440022423340998e-05, + "loss": 0.2739, + "step": 16656, + "teacher_loss": 0.2532699704170227 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.9130034446716309, + "learning_rate": 1.643776164756392e-05, + "loss": 0.2743, + "step": 16657, + "teacher_loss": 0.20332211256027222 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.8764932155609131, + "learning_rate": 1.6435500838823286e-05, + "loss": 0.6019, + "step": 16658, + "teacher_loss": 0.5714399814605713 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.504037618637085, + "learning_rate": 1.6433239997170913e-05, + "loss": 0.2176, + "step": 16659, + "teacher_loss": 0.18580546975135803 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.4445747137069702, + "learning_rate": 1.6430979122658646e-05, + "loss": 0.3894, + "step": 16660, + "teacher_loss": 0.3832918405532837 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.8176935911178589, + "learning_rate": 1.6428718215338323e-05, + "loss": 0.3249, + "step": 16661, + "teacher_loss": 0.2701180875301361 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.3848899006843567, + "learning_rate": 1.6426457275261766e-05, + "loss": 0.2108, + "step": 16662, + "teacher_loss": 0.19150319695472717 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.8961437344551086, + "learning_rate": 1.642419630248083e-05, + "loss": 0.3663, + "step": 16663, + "teacher_loss": 0.307392954826355 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.32402679324150085, + "learning_rate": 1.6421935297047335e-05, + "loss": 0.1916, + "step": 16664, + "teacher_loss": 0.17685265839099884 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.5209680795669556, + "learning_rate": 1.641967425901313e-05, + "loss": 0.2905, + "step": 16665, + "teacher_loss": 0.26488834619522095 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.6518914699554443, + "learning_rate": 1.641741318843005e-05, + "loss": 0.3479, + "step": 16666, + "teacher_loss": 0.31413114070892334 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.2759491503238678, + "learning_rate": 1.6415152085349934e-05, + "loss": 0.2048, + "step": 16667, + "teacher_loss": 0.1968681514263153 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.42380067706108093, + "learning_rate": 1.641289094982463e-05, + "loss": 0.2665, + "step": 16668, + "teacher_loss": 0.24907664954662323 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.8824596405029297, + "learning_rate": 1.6410629781905964e-05, + "loss": 0.2831, + "step": 16669, + "teacher_loss": 0.21654212474822998 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.4889693856239319, + "learning_rate": 1.640836858164579e-05, + "loss": 0.2695, + "step": 16670, + "teacher_loss": 0.24510234594345093 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.36210858821868896, + "learning_rate": 1.6406107349095943e-05, + "loss": 0.1841, + "step": 16671, + "teacher_loss": 0.16429701447486877 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.3743099272251129, + "learning_rate": 1.640384608430828e-05, + "loss": 0.1896, + "step": 16672, + "teacher_loss": 0.16910046339035034 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.8900965452194214, + "learning_rate": 1.640158478733463e-05, + "loss": 0.2885, + "step": 16673, + "teacher_loss": 0.22168835997581482 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.6605490446090698, + "learning_rate": 1.6399323458226844e-05, + "loss": 0.4708, + "step": 16674, + "teacher_loss": 0.4496930241584778 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.2045832872390747, + "learning_rate": 1.6397062097036764e-05, + "loss": 0.1874, + "step": 16675, + "teacher_loss": 0.18551163375377655 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.546154797077179, + "learning_rate": 1.6394800703816238e-05, + "loss": 0.2874, + "step": 16676, + "teacher_loss": 0.2586747705936432 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.4462851285934448, + "learning_rate": 1.6392539278617115e-05, + "loss": 0.2188, + "step": 16677, + "teacher_loss": 0.19351482391357422 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.539980411529541, + "learning_rate": 1.6390277821491243e-05, + "loss": 0.2541, + "step": 16678, + "teacher_loss": 0.2223459631204605 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.1386830359697342, + "learning_rate": 1.6388016332490464e-05, + "loss": 0.1724, + "step": 16679, + "teacher_loss": 0.17617622017860413 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.5103582739830017, + "learning_rate": 1.6385754811666637e-05, + "loss": 0.2021, + "step": 16680, + "teacher_loss": 0.1678774654865265 + }, + { + "compression_loss": 0.0, + "epoch": 3.01, + "label_loss": 0.6613031625747681, + "learning_rate": 1.6383493259071607e-05, + "loss": 0.2522, + "step": 16681, + "teacher_loss": 0.20675159990787506 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.22267544269561768, + "learning_rate": 1.6381231674757223e-05, + "loss": 0.1813, + "step": 16682, + "teacher_loss": 0.1767527461051941 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.7743809819221497, + "learning_rate": 1.637897005877534e-05, + "loss": 0.3101, + "step": 16683, + "teacher_loss": 0.2585359811782837 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.41415929794311523, + "learning_rate": 1.63767084111778e-05, + "loss": 0.2023, + "step": 16684, + "teacher_loss": 0.17875057458877563 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.7254762649536133, + "learning_rate": 1.637444673201647e-05, + "loss": 0.5528, + "step": 16685, + "teacher_loss": 0.5336033701896667 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.26265889406204224, + "learning_rate": 1.63721850213432e-05, + "loss": 0.3658, + "step": 16686, + "teacher_loss": 0.3772457242012024 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.22453008592128754, + "learning_rate": 1.6369923279209835e-05, + "loss": 0.1614, + "step": 16687, + "teacher_loss": 0.15439260005950928 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.4338299036026001, + "learning_rate": 1.6367661505668243e-05, + "loss": 0.2663, + "step": 16688, + "teacher_loss": 0.24764175713062286 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.7031207084655762, + "learning_rate": 1.6365399700770267e-05, + "loss": 0.6474, + "step": 16689, + "teacher_loss": 0.6412497758865356 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 1.307365894317627, + "learning_rate": 1.6363137864567773e-05, + "loss": 0.349, + "step": 16690, + "teacher_loss": 0.24253278970718384 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.24011573195457458, + "learning_rate": 1.6360875997112616e-05, + "loss": 0.1558, + "step": 16691, + "teacher_loss": 0.14643266797065735 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.7663685083389282, + "learning_rate": 1.6358614098456648e-05, + "loss": 0.2641, + "step": 16692, + "teacher_loss": 0.2083219289779663 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.398048460483551, + "learning_rate": 1.6356352168651738e-05, + "loss": 0.2141, + "step": 16693, + "teacher_loss": 0.19367438554763794 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.4373455047607422, + "learning_rate": 1.635409020774973e-05, + "loss": 0.3676, + "step": 16694, + "teacher_loss": 0.35987141728401184 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.3498024344444275, + "learning_rate": 1.6351828215802502e-05, + "loss": 0.1768, + "step": 16695, + "teacher_loss": 0.15755532681941986 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.3348073959350586, + "learning_rate": 1.6349566192861905e-05, + "loss": 0.1637, + "step": 16696, + "teacher_loss": 0.14464391767978668 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.30063748359680176, + "learning_rate": 1.6347304138979797e-05, + "loss": 0.223, + "step": 16697, + "teacher_loss": 0.21433916687965393 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.3812759220600128, + "learning_rate": 1.6345042054208047e-05, + "loss": 0.1913, + "step": 16698, + "teacher_loss": 0.17021194100379944 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 1.1720621585845947, + "learning_rate": 1.6342779938598518e-05, + "loss": 0.5735, + "step": 16699, + "teacher_loss": 0.5069878101348877 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.17568276822566986, + "learning_rate": 1.634051779220307e-05, + "loss": 0.1342, + "step": 16700, + "teacher_loss": 0.12953686714172363 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.1343419849872589, + "learning_rate": 1.633825561507356e-05, + "loss": 0.3337, + "step": 16701, + "teacher_loss": 0.35584571957588196 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.32095134258270264, + "learning_rate": 1.633599340726187e-05, + "loss": 0.2426, + "step": 16702, + "teacher_loss": 0.2338457703590393 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.8260831236839294, + "learning_rate": 1.6333731168819854e-05, + "loss": 0.3043, + "step": 16703, + "teacher_loss": 0.24637362360954285 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.23801273107528687, + "learning_rate": 1.6331468899799383e-05, + "loss": 0.2445, + "step": 16704, + "teacher_loss": 0.24520312249660492 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.24387332797050476, + "learning_rate": 1.6329206600252316e-05, + "loss": 0.2484, + "step": 16705, + "teacher_loss": 0.2489512711763382 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.739294171333313, + "learning_rate": 1.6326944270230532e-05, + "loss": 0.7563, + "step": 16706, + "teacher_loss": 0.7582250833511353 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.802885890007019, + "learning_rate": 1.6324681909785896e-05, + "loss": 0.5456, + "step": 16707, + "teacher_loss": 0.5170537829399109 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.21120405197143555, + "learning_rate": 1.632241951897027e-05, + "loss": 0.1691, + "step": 16708, + "teacher_loss": 0.16439834237098694 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.23087695240974426, + "learning_rate": 1.6320157097835533e-05, + "loss": 0.2062, + "step": 16709, + "teacher_loss": 0.20350268483161926 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.4218827486038208, + "learning_rate": 1.631789464643355e-05, + "loss": 0.234, + "step": 16710, + "teacher_loss": 0.21312332153320312 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.5379481315612793, + "learning_rate": 1.6315632164816197e-05, + "loss": 0.3182, + "step": 16711, + "teacher_loss": 0.29373788833618164 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.7547663450241089, + "learning_rate": 1.631336965303534e-05, + "loss": 0.2025, + "step": 16712, + "teacher_loss": 0.14108511805534363 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.295688271522522, + "learning_rate": 1.6311107111142855e-05, + "loss": 0.2818, + "step": 16713, + "teacher_loss": 0.28027287125587463 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.35632050037384033, + "learning_rate": 1.6308844539190612e-05, + "loss": 0.2214, + "step": 16714, + "teacher_loss": 0.2064087688922882 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.5817666053771973, + "learning_rate": 1.6306581937230493e-05, + "loss": 0.2431, + "step": 16715, + "teacher_loss": 0.20552444458007812 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.3169645667076111, + "learning_rate": 1.6304319305314365e-05, + "loss": 0.1998, + "step": 16716, + "teacher_loss": 0.1867600828409195 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.5003454685211182, + "learning_rate": 1.6302056643494105e-05, + "loss": 0.2467, + "step": 16717, + "teacher_loss": 0.21850891411304474 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.5456697940826416, + "learning_rate": 1.6299793951821596e-05, + "loss": 0.2641, + "step": 16718, + "teacher_loss": 0.23284760117530823 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.536749005317688, + "learning_rate": 1.62975312303487e-05, + "loss": 0.2572, + "step": 16719, + "teacher_loss": 0.22614958882331848 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 1.1537868976593018, + "learning_rate": 1.629526847912731e-05, + "loss": 0.4821, + "step": 16720, + "teacher_loss": 0.4074448347091675 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.25004416704177856, + "learning_rate": 1.629300569820929e-05, + "loss": 0.2943, + "step": 16721, + "teacher_loss": 0.29925674200057983 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.2055758237838745, + "learning_rate": 1.6290742887646532e-05, + "loss": 0.1891, + "step": 16722, + "teacher_loss": 0.18725544214248657 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.37162208557128906, + "learning_rate": 1.6288480047490908e-05, + "loss": 0.2105, + "step": 16723, + "teacher_loss": 0.19260048866271973 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.9930249452590942, + "learning_rate": 1.62862171777943e-05, + "loss": 0.3492, + "step": 16724, + "teacher_loss": 0.27770185470581055 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.5038953423500061, + "learning_rate": 1.6283954278608587e-05, + "loss": 0.2245, + "step": 16725, + "teacher_loss": 0.19348105788230896 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.25645095109939575, + "learning_rate": 1.628169134998565e-05, + "loss": 0.2623, + "step": 16726, + "teacher_loss": 0.26294687390327454 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.1329130083322525, + "learning_rate": 1.6279428391977377e-05, + "loss": 0.1864, + "step": 16727, + "teacher_loss": 0.19230203330516815 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.6769921779632568, + "learning_rate": 1.6277165404635647e-05, + "loss": 0.2872, + "step": 16728, + "teacher_loss": 0.2438545525074005 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 1.038051962852478, + "learning_rate": 1.627490238801234e-05, + "loss": 0.4215, + "step": 16729, + "teacher_loss": 0.352997362613678 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 1.3730661869049072, + "learning_rate": 1.6272639342159346e-05, + "loss": 0.3072, + "step": 16730, + "teacher_loss": 0.18873238563537598 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.2309224009513855, + "learning_rate": 1.6270376267128544e-05, + "loss": 0.2244, + "step": 16731, + "teacher_loss": 0.22373080253601074 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.5658903121948242, + "learning_rate": 1.6268113162971826e-05, + "loss": 0.3602, + "step": 16732, + "teacher_loss": 0.3373996913433075 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.24809661507606506, + "learning_rate": 1.6265850029741078e-05, + "loss": 0.1915, + "step": 16733, + "teacher_loss": 0.1851608157157898 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.3477705717086792, + "learning_rate": 1.6263586867488183e-05, + "loss": 0.2451, + "step": 16734, + "teacher_loss": 0.233689546585083 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.34659552574157715, + "learning_rate": 1.6261323676265026e-05, + "loss": 0.2363, + "step": 16735, + "teacher_loss": 0.22401948273181915 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.48806464672088623, + "learning_rate": 1.62590604561235e-05, + "loss": 0.2316, + "step": 16736, + "teacher_loss": 0.20304948091506958 + }, + { + "compression_loss": 0.0, + "epoch": 3.02, + "label_loss": 0.25840654969215393, + "learning_rate": 1.6256797207115495e-05, + "loss": 0.3355, + "step": 16737, + "teacher_loss": 0.3440427780151367 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.2350911796092987, + "learning_rate": 1.6254533929292893e-05, + "loss": 0.2471, + "step": 16738, + "teacher_loss": 0.24843406677246094 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.15830382704734802, + "learning_rate": 1.6252270622707592e-05, + "loss": 0.1646, + "step": 16739, + "teacher_loss": 0.1652618646621704 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.4054409861564636, + "learning_rate": 1.625000728741148e-05, + "loss": 0.2331, + "step": 16740, + "teacher_loss": 0.2139054536819458 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.6492743492126465, + "learning_rate": 1.6247743923456452e-05, + "loss": 0.2859, + "step": 16741, + "teacher_loss": 0.24554742872714996 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.9160658717155457, + "learning_rate": 1.624548053089439e-05, + "loss": 0.4797, + "step": 16742, + "teacher_loss": 0.4311636686325073 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.32270270586013794, + "learning_rate": 1.6243217109777202e-05, + "loss": 0.2721, + "step": 16743, + "teacher_loss": 0.26653292775154114 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.2720167338848114, + "learning_rate": 1.6240953660156768e-05, + "loss": 0.251, + "step": 16744, + "teacher_loss": 0.24869826436042786 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.6041932106018066, + "learning_rate": 1.623869018208499e-05, + "loss": 0.2917, + "step": 16745, + "teacher_loss": 0.25694602727890015 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.3712504208087921, + "learning_rate": 1.6236426675613754e-05, + "loss": 0.2623, + "step": 16746, + "teacher_loss": 0.25016146898269653 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.7163546681404114, + "learning_rate": 1.6234163140794965e-05, + "loss": 0.3116, + "step": 16747, + "teacher_loss": 0.26661065220832825 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5679436922073364, + "learning_rate": 1.623189957768052e-05, + "loss": 0.2821, + "step": 16748, + "teacher_loss": 0.2503289580345154 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.44359537959098816, + "learning_rate": 1.62296359863223e-05, + "loss": 0.2096, + "step": 16749, + "teacher_loss": 0.18361210823059082 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.3506343960762024, + "learning_rate": 1.622737236677222e-05, + "loss": 0.3918, + "step": 16750, + "teacher_loss": 0.39637070894241333 + }, + { + "epoch": 3.03, + "eval_exact_match": 79.6972563859981, + "eval_f1": 87.28700317981962, + "step": 16750 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.2943986654281616, + "learning_rate": 1.6225108719082173e-05, + "loss": 0.3572, + "step": 16751, + "teacher_loss": 0.364205539226532 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.9274237751960754, + "learning_rate": 1.622284504330405e-05, + "loss": 0.4402, + "step": 16752, + "teacher_loss": 0.38603758811950684 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.3011430501937866, + "learning_rate": 1.622058133948976e-05, + "loss": 0.1812, + "step": 16753, + "teacher_loss": 0.16790489852428436 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5951123237609863, + "learning_rate": 1.6218317607691208e-05, + "loss": 0.2445, + "step": 16754, + "teacher_loss": 0.2055935263633728 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.42148837447166443, + "learning_rate": 1.6216053847960272e-05, + "loss": 0.2174, + "step": 16755, + "teacher_loss": 0.19471558928489685 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.624860405921936, + "learning_rate": 1.621379006034888e-05, + "loss": 0.3436, + "step": 16756, + "teacher_loss": 0.3123078942298889 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.35093092918395996, + "learning_rate": 1.621152624490891e-05, + "loss": 0.2182, + "step": 16757, + "teacher_loss": 0.2034684121608734 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.7728095054626465, + "learning_rate": 1.620926240169228e-05, + "loss": 0.3431, + "step": 16758, + "teacher_loss": 0.2953372597694397 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.4074169099330902, + "learning_rate": 1.6206998530750893e-05, + "loss": 0.1994, + "step": 16759, + "teacher_loss": 0.17628496885299683 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.2834703028202057, + "learning_rate": 1.620473463213664e-05, + "loss": 0.1608, + "step": 16760, + "teacher_loss": 0.14721494913101196 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.40931612253189087, + "learning_rate": 1.6202470705901436e-05, + "loss": 0.2805, + "step": 16761, + "teacher_loss": 0.2662176489830017 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5422577261924744, + "learning_rate": 1.6200206752097187e-05, + "loss": 0.2985, + "step": 16762, + "teacher_loss": 0.2714230418205261 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.23011064529418945, + "learning_rate": 1.6197942770775795e-05, + "loss": 0.2529, + "step": 16763, + "teacher_loss": 0.25548604130744934 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.3078126907348633, + "learning_rate": 1.6195678761989167e-05, + "loss": 0.3545, + "step": 16764, + "teacher_loss": 0.35966312885284424 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.12039228528738022, + "learning_rate": 1.619341472578921e-05, + "loss": 0.1559, + "step": 16765, + "teacher_loss": 0.15988051891326904 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.7397499084472656, + "learning_rate": 1.619115066222783e-05, + "loss": 0.2494, + "step": 16766, + "teacher_loss": 0.1949431598186493 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.4226497411727905, + "learning_rate": 1.618888657135694e-05, + "loss": 0.3021, + "step": 16767, + "teacher_loss": 0.28872549533843994 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.44641709327697754, + "learning_rate": 1.618662245322844e-05, + "loss": 0.2524, + "step": 16768, + "teacher_loss": 0.23082174360752106 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.3515903055667877, + "learning_rate": 1.6184358307894246e-05, + "loss": 0.2078, + "step": 16769, + "teacher_loss": 0.19177690148353577 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.40109825134277344, + "learning_rate": 1.618209413540627e-05, + "loss": 0.3817, + "step": 16770, + "teacher_loss": 0.3795184791088104 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.8166964054107666, + "learning_rate": 1.6179829935816416e-05, + "loss": 0.2392, + "step": 16771, + "teacher_loss": 0.17506438493728638 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.6074659824371338, + "learning_rate": 1.61775657091766e-05, + "loss": 0.3083, + "step": 16772, + "teacher_loss": 0.2750290036201477 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.35683396458625793, + "learning_rate": 1.617530145553874e-05, + "loss": 0.2194, + "step": 16773, + "teacher_loss": 0.20417499542236328 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.2445635199546814, + "learning_rate": 1.617303717495473e-05, + "loss": 0.1653, + "step": 16774, + "teacher_loss": 0.15646912157535553 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.4458203911781311, + "learning_rate": 1.6170772867476495e-05, + "loss": 0.2147, + "step": 16775, + "teacher_loss": 0.18897312879562378 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.2752021551132202, + "learning_rate": 1.616850853315596e-05, + "loss": 0.2119, + "step": 16776, + "teacher_loss": 0.2049192637205124 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.16564348340034485, + "learning_rate": 1.6166244172045017e-05, + "loss": 0.1907, + "step": 16777, + "teacher_loss": 0.1934380829334259 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5233674049377441, + "learning_rate": 1.6163979784195594e-05, + "loss": 0.2539, + "step": 16778, + "teacher_loss": 0.22399696707725525 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.34260302782058716, + "learning_rate": 1.6161715369659607e-05, + "loss": 0.2149, + "step": 16779, + "teacher_loss": 0.20074602961540222 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.7575348019599915, + "learning_rate": 1.615945092848897e-05, + "loss": 0.2639, + "step": 16780, + "teacher_loss": 0.20899678766727448 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.18293337523937225, + "learning_rate": 1.61571864607356e-05, + "loss": 0.1578, + "step": 16781, + "teacher_loss": 0.15500061213970184 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.7627553343772888, + "learning_rate": 1.6154921966451407e-05, + "loss": 0.3333, + "step": 16782, + "teacher_loss": 0.28560519218444824 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.3845599293708801, + "learning_rate": 1.615265744568832e-05, + "loss": 0.2519, + "step": 16783, + "teacher_loss": 0.23713265359401703 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5760377645492554, + "learning_rate": 1.6150392898498258e-05, + "loss": 0.2775, + "step": 16784, + "teacher_loss": 0.24431398510932922 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.4641030430793762, + "learning_rate": 1.6148128324933127e-05, + "loss": 0.2235, + "step": 16785, + "teacher_loss": 0.1967858076095581 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.55330491065979, + "learning_rate": 1.6145863725044864e-05, + "loss": 0.1809, + "step": 16786, + "teacher_loss": 0.13947996497154236 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5893657207489014, + "learning_rate": 1.6143599098885377e-05, + "loss": 0.3349, + "step": 16787, + "teacher_loss": 0.30662715435028076 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.9667890071868896, + "learning_rate": 1.614133444650659e-05, + "loss": 0.3687, + "step": 16788, + "teacher_loss": 0.30226409435272217 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.32840967178344727, + "learning_rate": 1.613906976796043e-05, + "loss": 0.177, + "step": 16789, + "teacher_loss": 0.16014347970485687 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.08945631235837936, + "learning_rate": 1.613680506329881e-05, + "loss": 0.161, + "step": 16790, + "teacher_loss": 0.16891761124134064 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.27621936798095703, + "learning_rate": 1.6134540332573665e-05, + "loss": 0.1545, + "step": 16791, + "teacher_loss": 0.14095818996429443 + }, + { + "compression_loss": 0.0, + "epoch": 3.03, + "label_loss": 0.5001845955848694, + "learning_rate": 1.613227557583691e-05, + "loss": 0.2949, + "step": 16792, + "teacher_loss": 0.27207979559898376 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2931975722312927, + "learning_rate": 1.613001079314047e-05, + "loss": 0.1978, + "step": 16793, + "teacher_loss": 0.18719851970672607 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.584324836730957, + "learning_rate": 1.6127745984536266e-05, + "loss": 0.2103, + "step": 16794, + "teacher_loss": 0.16873955726623535 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.34020107984542847, + "learning_rate": 1.6125481150076232e-05, + "loss": 0.242, + "step": 16795, + "teacher_loss": 0.23103654384613037 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.161312997341156, + "learning_rate": 1.612321628981229e-05, + "loss": 0.1787, + "step": 16796, + "teacher_loss": 0.1806664764881134 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.16806629300117493, + "learning_rate": 1.6120951403796367e-05, + "loss": 0.1304, + "step": 16797, + "teacher_loss": 0.12616702914237976 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.5515435934066772, + "learning_rate": 1.6118686492080386e-05, + "loss": 0.2025, + "step": 16798, + "teacher_loss": 0.16368865966796875 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3803243637084961, + "learning_rate": 1.6116421554716278e-05, + "loss": 0.2177, + "step": 16799, + "teacher_loss": 0.1996442824602127 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.24220040440559387, + "learning_rate": 1.6114156591755972e-05, + "loss": 0.1706, + "step": 16800, + "teacher_loss": 0.16261771321296692 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.9484876394271851, + "learning_rate": 1.6111891603251396e-05, + "loss": 0.35, + "step": 16801, + "teacher_loss": 0.2835564911365509 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2501353621482849, + "learning_rate": 1.6109626589254475e-05, + "loss": 0.2175, + "step": 16802, + "teacher_loss": 0.21384716033935547 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.5608738660812378, + "learning_rate": 1.610736154981715e-05, + "loss": 0.2393, + "step": 16803, + "teacher_loss": 0.20356637239456177 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.302284300327301, + "learning_rate": 1.610509648499134e-05, + "loss": 0.1925, + "step": 16804, + "teacher_loss": 0.18029913306236267 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.06422247737646103, + "learning_rate": 1.610283139482898e-05, + "loss": 0.1579, + "step": 16805, + "teacher_loss": 0.1682727336883545 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4555017948150635, + "learning_rate": 1.6100566279382013e-05, + "loss": 0.2161, + "step": 16806, + "teacher_loss": 0.1895502507686615 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2842525839805603, + "learning_rate": 1.609830113870235e-05, + "loss": 0.2747, + "step": 16807, + "teacher_loss": 0.27369171380996704 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4931330680847168, + "learning_rate": 1.6096035972841937e-05, + "loss": 0.2869, + "step": 16808, + "teacher_loss": 0.264007031917572 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.7901811599731445, + "learning_rate": 1.6093770781852708e-05, + "loss": 0.3363, + "step": 16809, + "teacher_loss": 0.28585487604141235 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.569908857345581, + "learning_rate": 1.6091505565786588e-05, + "loss": 0.3198, + "step": 16810, + "teacher_loss": 0.29203182458877563 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3426416516304016, + "learning_rate": 1.6089240324695526e-05, + "loss": 0.332, + "step": 16811, + "teacher_loss": 0.3308408856391907 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2697465717792511, + "learning_rate": 1.6086975058631443e-05, + "loss": 0.1465, + "step": 16812, + "teacher_loss": 0.13275158405303955 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2798083424568176, + "learning_rate": 1.6084709767646285e-05, + "loss": 0.1417, + "step": 16813, + "teacher_loss": 0.1263493001461029 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.26262956857681274, + "learning_rate": 1.6082444451791986e-05, + "loss": 0.1688, + "step": 16814, + "teacher_loss": 0.15836410224437714 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4059748351573944, + "learning_rate": 1.608017911112047e-05, + "loss": 0.1877, + "step": 16815, + "teacher_loss": 0.16341149806976318 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4099876880645752, + "learning_rate": 1.6077913745683696e-05, + "loss": 0.2056, + "step": 16816, + "teacher_loss": 0.1828356683254242 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3752114772796631, + "learning_rate": 1.607564835553359e-05, + "loss": 0.2599, + "step": 16817, + "teacher_loss": 0.24708473682403564 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.702766478061676, + "learning_rate": 1.6073382940722088e-05, + "loss": 0.2528, + "step": 16818, + "teacher_loss": 0.20275646448135376 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.5798918008804321, + "learning_rate": 1.6071117501301138e-05, + "loss": 0.4542, + "step": 16819, + "teacher_loss": 0.44027501344680786 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.35304880142211914, + "learning_rate": 1.606885203732267e-05, + "loss": 0.2081, + "step": 16820, + "teacher_loss": 0.19195863604545593 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3803020715713501, + "learning_rate": 1.606658654883863e-05, + "loss": 0.2607, + "step": 16821, + "teacher_loss": 0.24745498597621918 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3758506178855896, + "learning_rate": 1.6064321035900965e-05, + "loss": 0.2387, + "step": 16822, + "teacher_loss": 0.2235088348388672 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.9602616429328918, + "learning_rate": 1.6062055498561607e-05, + "loss": 0.7645, + "step": 16823, + "teacher_loss": 0.7427017688751221 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.6457028388977051, + "learning_rate": 1.6059789936872495e-05, + "loss": 0.4047, + "step": 16824, + "teacher_loss": 0.37791305780410767 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2610934376716614, + "learning_rate": 1.6057524350885583e-05, + "loss": 0.2145, + "step": 16825, + "teacher_loss": 0.20931777358055115 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.5805801749229431, + "learning_rate": 1.6055258740652806e-05, + "loss": 0.2681, + "step": 16826, + "teacher_loss": 0.2334115207195282 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2725808620452881, + "learning_rate": 1.6052993106226105e-05, + "loss": 0.4213, + "step": 16827, + "teacher_loss": 0.4378345012664795 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3599764108657837, + "learning_rate": 1.6050727447657437e-05, + "loss": 0.2117, + "step": 16828, + "teacher_loss": 0.19525441527366638 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4869891405105591, + "learning_rate": 1.6048461764998735e-05, + "loss": 0.2003, + "step": 16829, + "teacher_loss": 0.1684727966785431 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.5623921155929565, + "learning_rate": 1.6046196058301953e-05, + "loss": 0.3062, + "step": 16830, + "teacher_loss": 0.27769070863723755 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4406111240386963, + "learning_rate": 1.6043930327619028e-05, + "loss": 0.2307, + "step": 16831, + "teacher_loss": 0.20736992359161377 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.2761671245098114, + "learning_rate": 1.604166457300191e-05, + "loss": 0.1625, + "step": 16832, + "teacher_loss": 0.1498270034790039 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4341892600059509, + "learning_rate": 1.6039398794502548e-05, + "loss": 0.1562, + "step": 16833, + "teacher_loss": 0.1253231167793274 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.6963376998901367, + "learning_rate": 1.6037132992172887e-05, + "loss": 0.265, + "step": 16834, + "teacher_loss": 0.21707329154014587 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.7535719871520996, + "learning_rate": 1.6034867166064873e-05, + "loss": 0.2651, + "step": 16835, + "teacher_loss": 0.21079209446907043 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.45142465829849243, + "learning_rate": 1.6032601316230466e-05, + "loss": 0.2751, + "step": 16836, + "teacher_loss": 0.2554752230644226 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.31602269411087036, + "learning_rate": 1.6030335442721598e-05, + "loss": 0.1912, + "step": 16837, + "teacher_loss": 0.17727622389793396 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.8295608758926392, + "learning_rate": 1.602806954559023e-05, + "loss": 0.2234, + "step": 16838, + "teacher_loss": 0.15609216690063477 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.440433144569397, + "learning_rate": 1.602580362488831e-05, + "loss": 0.216, + "step": 16839, + "teacher_loss": 0.19102974236011505 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.6695796251296997, + "learning_rate": 1.6023537680667787e-05, + "loss": 0.5044, + "step": 16840, + "teacher_loss": 0.48600631952285767 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.6110231280326843, + "learning_rate": 1.602127171298062e-05, + "loss": 0.2144, + "step": 16841, + "teacher_loss": 0.1703658401966095 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.3728519678115845, + "learning_rate": 1.6019005721878748e-05, + "loss": 0.2577, + "step": 16842, + "teacher_loss": 0.24487125873565674 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.26037514209747314, + "learning_rate": 1.6016739707414134e-05, + "loss": 0.2101, + "step": 16843, + "teacher_loss": 0.2045416235923767 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.5497496128082275, + "learning_rate": 1.6014473669638725e-05, + "loss": 0.2606, + "step": 16844, + "teacher_loss": 0.2284727692604065 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4091755151748657, + "learning_rate": 1.6012207608604473e-05, + "loss": 0.2539, + "step": 16845, + "teacher_loss": 0.23665225505828857 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.4850279986858368, + "learning_rate": 1.6009941524363338e-05, + "loss": 0.2828, + "step": 16846, + "teacher_loss": 0.2603681981563568 + }, + { + "compression_loss": 0.0, + "epoch": 3.04, + "label_loss": 0.36309248208999634, + "learning_rate": 1.6007675416967273e-05, + "loss": 0.2457, + "step": 16847, + "teacher_loss": 0.232618510723114 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6456424593925476, + "learning_rate": 1.600540928646823e-05, + "loss": 0.5705, + "step": 16848, + "teacher_loss": 0.5621359944343567 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.5721041560173035, + "learning_rate": 1.6003143132918172e-05, + "loss": 0.3132, + "step": 16849, + "teacher_loss": 0.2844192385673523 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6573474407196045, + "learning_rate": 1.6000876956369043e-05, + "loss": 0.3054, + "step": 16850, + "teacher_loss": 0.26624369621276855 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.31568092107772827, + "learning_rate": 1.599861075687281e-05, + "loss": 0.2458, + "step": 16851, + "teacher_loss": 0.2380181849002838 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.4519786834716797, + "learning_rate": 1.5996344534481427e-05, + "loss": 0.1766, + "step": 16852, + "teacher_loss": 0.14605773985385895 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.46846020221710205, + "learning_rate": 1.5994078289246843e-05, + "loss": 0.189, + "step": 16853, + "teacher_loss": 0.15791219472885132 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.23215147852897644, + "learning_rate": 1.5991812021221033e-05, + "loss": 0.2055, + "step": 16854, + "teacher_loss": 0.20257382094860077 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.4606871008872986, + "learning_rate": 1.5989545730455945e-05, + "loss": 0.3178, + "step": 16855, + "teacher_loss": 0.30187657475471497 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.4988008141517639, + "learning_rate": 1.5987279417003537e-05, + "loss": 0.2922, + "step": 16856, + "teacher_loss": 0.2692581117153168 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.16587047278881073, + "learning_rate": 1.5985013080915772e-05, + "loss": 0.156, + "step": 16857, + "teacher_loss": 0.1548621654510498 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3116499185562134, + "learning_rate": 1.5982746722244612e-05, + "loss": 0.1833, + "step": 16858, + "teacher_loss": 0.1689913421869278 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.7125014662742615, + "learning_rate": 1.5980480341042017e-05, + "loss": 0.2575, + "step": 16859, + "teacher_loss": 0.2069072276353836 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.5630989074707031, + "learning_rate": 1.5978213937359946e-05, + "loss": 0.3625, + "step": 16860, + "teacher_loss": 0.3402237594127655 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6866254806518555, + "learning_rate": 1.5975947511250367e-05, + "loss": 0.292, + "step": 16861, + "teacher_loss": 0.24809899926185608 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.5564385056495667, + "learning_rate": 1.597368106276523e-05, + "loss": 0.1946, + "step": 16862, + "teacher_loss": 0.15442153811454773 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3809937834739685, + "learning_rate": 1.597141459195651e-05, + "loss": 0.2144, + "step": 16863, + "teacher_loss": 0.19593197107315063 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.2710877060890198, + "learning_rate": 1.5969148098876166e-05, + "loss": 0.2463, + "step": 16864, + "teacher_loss": 0.24349215626716614 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3203345537185669, + "learning_rate": 1.596688158357616e-05, + "loss": 0.208, + "step": 16865, + "teacher_loss": 0.19549402594566345 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.44225579500198364, + "learning_rate": 1.596461504610846e-05, + "loss": 0.3731, + "step": 16866, + "teacher_loss": 0.36539965867996216 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.4427652955055237, + "learning_rate": 1.5962348486525028e-05, + "loss": 0.2447, + "step": 16867, + "teacher_loss": 0.22273138165473938 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 1.1535390615463257, + "learning_rate": 1.596008190487783e-05, + "loss": 0.2724, + "step": 16868, + "teacher_loss": 0.17445990443229675 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.23975099623203278, + "learning_rate": 1.5957815301218834e-05, + "loss": 0.3467, + "step": 16869, + "teacher_loss": 0.35854339599609375 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.32493311166763306, + "learning_rate": 1.59555486756e-05, + "loss": 0.2069, + "step": 16870, + "teacher_loss": 0.19383135437965393 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.27010229229927063, + "learning_rate": 1.5953282028073307e-05, + "loss": 0.1967, + "step": 16871, + "teacher_loss": 0.1884966790676117 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.23377005755901337, + "learning_rate": 1.5951015358690712e-05, + "loss": 0.1878, + "step": 16872, + "teacher_loss": 0.18272638320922852 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3354255259037018, + "learning_rate": 1.5948748667504182e-05, + "loss": 0.2218, + "step": 16873, + "teacher_loss": 0.2092185914516449 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6702821254730225, + "learning_rate": 1.5946481954565696e-05, + "loss": 0.2524, + "step": 16874, + "teacher_loss": 0.20599710941314697 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6687851548194885, + "learning_rate": 1.5944215219927212e-05, + "loss": 0.4103, + "step": 16875, + "teacher_loss": 0.3815535604953766 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6331831216812134, + "learning_rate": 1.5941948463640708e-05, + "loss": 0.2273, + "step": 16876, + "teacher_loss": 0.18219245970249176 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.18871107697486877, + "learning_rate": 1.5939681685758146e-05, + "loss": 0.1874, + "step": 16877, + "teacher_loss": 0.18720856308937073 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.13560549914836884, + "learning_rate": 1.59374148863315e-05, + "loss": 0.1569, + "step": 16878, + "teacher_loss": 0.1592504382133484 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.26225972175598145, + "learning_rate": 1.5935148065412743e-05, + "loss": 0.1709, + "step": 16879, + "teacher_loss": 0.16069883108139038 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.6609208583831787, + "learning_rate": 1.5932881223053847e-05, + "loss": 0.2505, + "step": 16880, + "teacher_loss": 0.20492485165596008 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3135026693344116, + "learning_rate": 1.593061435930678e-05, + "loss": 0.2494, + "step": 16881, + "teacher_loss": 0.24232399463653564 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.36897093057632446, + "learning_rate": 1.592834747422351e-05, + "loss": 0.254, + "step": 16882, + "teacher_loss": 0.2412070631980896 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.5133674144744873, + "learning_rate": 1.5926080567856023e-05, + "loss": 0.268, + "step": 16883, + "teacher_loss": 0.24070867896080017 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.1623694896697998, + "learning_rate": 1.592381364025628e-05, + "loss": 0.1423, + "step": 16884, + "teacher_loss": 0.14011451601982117 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3547600507736206, + "learning_rate": 1.5921546691476264e-05, + "loss": 0.207, + "step": 16885, + "teacher_loss": 0.19056382775306702 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.3432597517967224, + "learning_rate": 1.5919279721567948e-05, + "loss": 0.2675, + "step": 16886, + "teacher_loss": 0.25912314653396606 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.33007028698921204, + "learning_rate": 1.5917012730583298e-05, + "loss": 0.1806, + "step": 16887, + "teacher_loss": 0.1640370786190033 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.37300965189933777, + "learning_rate": 1.5914745718574297e-05, + "loss": 0.2466, + "step": 16888, + "teacher_loss": 0.2325739860534668 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.2121027410030365, + "learning_rate": 1.591247868559292e-05, + "loss": 0.2052, + "step": 16889, + "teacher_loss": 0.20447131991386414 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.20480406284332275, + "learning_rate": 1.591021163169114e-05, + "loss": 0.1459, + "step": 16890, + "teacher_loss": 0.13940449059009552 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.35742485523223877, + "learning_rate": 1.590794455692094e-05, + "loss": 0.2103, + "step": 16891, + "teacher_loss": 0.19391420483589172 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.41289693117141724, + "learning_rate": 1.5905677461334292e-05, + "loss": 0.3053, + "step": 16892, + "teacher_loss": 0.29330432415008545 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.41480934619903564, + "learning_rate": 1.5903410344983175e-05, + "loss": 0.1612, + "step": 16893, + "teacher_loss": 0.1330462545156479 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.20442867279052734, + "learning_rate": 1.590114320791957e-05, + "loss": 0.2163, + "step": 16894, + "teacher_loss": 0.21761064231395721 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.4068581461906433, + "learning_rate": 1.589887605019545e-05, + "loss": 0.3528, + "step": 16895, + "teacher_loss": 0.34677520394325256 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.5025511384010315, + "learning_rate": 1.5896608871862798e-05, + "loss": 0.2409, + "step": 16896, + "teacher_loss": 0.21177448332309723 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.36766624450683594, + "learning_rate": 1.589434167297359e-05, + "loss": 0.1621, + "step": 16897, + "teacher_loss": 0.13930313289165497 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.29141315817832947, + "learning_rate": 1.589207445357981e-05, + "loss": 0.2332, + "step": 16898, + "teacher_loss": 0.2267286479473114 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.35825392603874207, + "learning_rate": 1.588980721373344e-05, + "loss": 0.2524, + "step": 16899, + "teacher_loss": 0.2406141757965088 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.7143042087554932, + "learning_rate": 1.5887539953486456e-05, + "loss": 0.292, + "step": 16900, + "teacher_loss": 0.24505400657653809 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.8673523664474487, + "learning_rate": 1.5885272672890842e-05, + "loss": 0.342, + "step": 16901, + "teacher_loss": 0.28361114859580994 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.7867000102996826, + "learning_rate": 1.5883005371998582e-05, + "loss": 0.3843, + "step": 16902, + "teacher_loss": 0.3395382761955261 + }, + { + "compression_loss": 0.0, + "epoch": 3.05, + "label_loss": 0.8298860788345337, + "learning_rate": 1.5880738050861654e-05, + "loss": 0.3379, + "step": 16903, + "teacher_loss": 0.28318527340888977 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3373580276966095, + "learning_rate": 1.5878470709532044e-05, + "loss": 0.1931, + "step": 16904, + "teacher_loss": 0.1770373284816742 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.30741333961486816, + "learning_rate": 1.5876203348061732e-05, + "loss": 0.2574, + "step": 16905, + "teacher_loss": 0.2518293261528015 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3876432776451111, + "learning_rate": 1.587393596650271e-05, + "loss": 0.1968, + "step": 16906, + "teacher_loss": 0.17564880847930908 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.36496394872665405, + "learning_rate": 1.5871668564906955e-05, + "loss": 0.2015, + "step": 16907, + "teacher_loss": 0.18335093557834625 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.5074715614318848, + "learning_rate": 1.586940114332645e-05, + "loss": 0.23, + "step": 16908, + "teacher_loss": 0.19916322827339172 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.20610252022743225, + "learning_rate": 1.5867133701813183e-05, + "loss": 0.1502, + "step": 16909, + "teacher_loss": 0.14400246739387512 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.2280387282371521, + "learning_rate": 1.586486624041914e-05, + "loss": 0.1973, + "step": 16910, + "teacher_loss": 0.1938643902540207 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.08964599668979645, + "learning_rate": 1.586259875919631e-05, + "loss": 0.1632, + "step": 16911, + "teacher_loss": 0.17136666178703308 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.43679535388946533, + "learning_rate": 1.586033125819668e-05, + "loss": 0.2046, + "step": 16912, + "teacher_loss": 0.17881129682064056 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.14137499034404755, + "learning_rate": 1.5858063737472222e-05, + "loss": 0.1481, + "step": 16913, + "teacher_loss": 0.1488851010799408 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.5175946354866028, + "learning_rate": 1.585579619707494e-05, + "loss": 0.2593, + "step": 16914, + "teacher_loss": 0.2306080162525177 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.2763390839099884, + "learning_rate": 1.5853528637056827e-05, + "loss": 0.2868, + "step": 16915, + "teacher_loss": 0.2879959046840668 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.2820940911769867, + "learning_rate": 1.5851261057469852e-05, + "loss": 0.216, + "step": 16916, + "teacher_loss": 0.2086283564567566 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.373432993888855, + "learning_rate": 1.5848993458366012e-05, + "loss": 0.2451, + "step": 16917, + "teacher_loss": 0.23082809150218964 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.2175893634557724, + "learning_rate": 1.58467258397973e-05, + "loss": 0.25, + "step": 16918, + "teacher_loss": 0.2535994350910187 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.49444180727005005, + "learning_rate": 1.5844458201815702e-05, + "loss": 0.2795, + "step": 16919, + "teacher_loss": 0.25562554597854614 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.13361681997776031, + "learning_rate": 1.584219054447321e-05, + "loss": 0.1913, + "step": 16920, + "teacher_loss": 0.19768434762954712 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.5936633348464966, + "learning_rate": 1.583992286782182e-05, + "loss": 0.2298, + "step": 16921, + "teacher_loss": 0.1893216073513031 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.49842774868011475, + "learning_rate": 1.5837655171913508e-05, + "loss": 0.3824, + "step": 16922, + "teacher_loss": 0.36948540806770325 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.5964106321334839, + "learning_rate": 1.583538745680028e-05, + "loss": 0.285, + "step": 16923, + "teacher_loss": 0.25036680698394775 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3094998300075531, + "learning_rate": 1.5833119722534118e-05, + "loss": 0.2301, + "step": 16924, + "teacher_loss": 0.22126713395118713 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.384113609790802, + "learning_rate": 1.5830851969167018e-05, + "loss": 0.1884, + "step": 16925, + "teacher_loss": 0.16669228672981262 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.45244431495666504, + "learning_rate": 1.5828584196750977e-05, + "loss": 0.3083, + "step": 16926, + "teacher_loss": 0.2922462224960327 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.47019338607788086, + "learning_rate": 1.5826316405337983e-05, + "loss": 0.2526, + "step": 16927, + "teacher_loss": 0.2283751666545868 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.27009373903274536, + "learning_rate": 1.582404859498003e-05, + "loss": 0.2023, + "step": 16928, + "teacher_loss": 0.19472911953926086 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.4683631956577301, + "learning_rate": 1.5821780765729118e-05, + "loss": 0.2965, + "step": 16929, + "teacher_loss": 0.2773500084877014 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.9500136375427246, + "learning_rate": 1.581951291763723e-05, + "loss": 0.6972, + "step": 16930, + "teacher_loss": 0.6691627502441406 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.22023798525333405, + "learning_rate": 1.5817245050756374e-05, + "loss": 0.195, + "step": 16931, + "teacher_loss": 0.19219177961349487 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.4518231749534607, + "learning_rate": 1.5814977165138537e-05, + "loss": 0.226, + "step": 16932, + "teacher_loss": 0.20095695555210114 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3171052634716034, + "learning_rate": 1.5812709260835715e-05, + "loss": 0.2211, + "step": 16933, + "teacher_loss": 0.2104271799325943 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.6728929877281189, + "learning_rate": 1.581044133789991e-05, + "loss": 0.3857, + "step": 16934, + "teacher_loss": 0.3538045883178711 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.20353052020072937, + "learning_rate": 1.5808173396383113e-05, + "loss": 0.1571, + "step": 16935, + "teacher_loss": 0.15188682079315186 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.6556011438369751, + "learning_rate": 1.580590543633732e-05, + "loss": 0.2818, + "step": 16936, + "teacher_loss": 0.24026557803153992 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.31891047954559326, + "learning_rate": 1.580363745781454e-05, + "loss": 0.2147, + "step": 16937, + "teacher_loss": 0.2030760794878006 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.6376428008079529, + "learning_rate": 1.5801369460866753e-05, + "loss": 0.248, + "step": 16938, + "teacher_loss": 0.20466148853302002 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.37075769901275635, + "learning_rate": 1.5799101445545973e-05, + "loss": 0.1963, + "step": 16939, + "teacher_loss": 0.17692790925502777 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.7148482799530029, + "learning_rate": 1.5796833411904192e-05, + "loss": 0.2267, + "step": 16940, + "teacher_loss": 0.17249146103858948 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.9486251473426819, + "learning_rate": 1.5794565359993408e-05, + "loss": 0.3398, + "step": 16941, + "teacher_loss": 0.27218031883239746 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.35436415672302246, + "learning_rate": 1.5792297289865618e-05, + "loss": 0.2208, + "step": 16942, + "teacher_loss": 0.20599046349525452 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.301593542098999, + "learning_rate": 1.5790029201572834e-05, + "loss": 0.2045, + "step": 16943, + "teacher_loss": 0.19374148547649384 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.48216670751571655, + "learning_rate": 1.5787761095167047e-05, + "loss": 0.2561, + "step": 16944, + "teacher_loss": 0.23099717497825623 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3535589873790741, + "learning_rate": 1.578549297070026e-05, + "loss": 0.278, + "step": 16945, + "teacher_loss": 0.26956436038017273 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.39084386825561523, + "learning_rate": 1.5783224828224476e-05, + "loss": 0.239, + "step": 16946, + "teacher_loss": 0.22210997343063354 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.19451268017292023, + "learning_rate": 1.578095666779169e-05, + "loss": 0.1521, + "step": 16947, + "teacher_loss": 0.14734452962875366 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.5897947549819946, + "learning_rate": 1.5778688489453912e-05, + "loss": 0.3652, + "step": 16948, + "teacher_loss": 0.3402193784713745 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.8268476724624634, + "learning_rate": 1.5776420293263144e-05, + "loss": 0.4349, + "step": 16949, + "teacher_loss": 0.39138779044151306 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.5780760645866394, + "learning_rate": 1.5774152079271382e-05, + "loss": 0.3589, + "step": 16950, + "teacher_loss": 0.33452218770980835 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.31508561968803406, + "learning_rate": 1.5771883847530637e-05, + "loss": 0.1979, + "step": 16951, + "teacher_loss": 0.18491590023040771 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.39863935112953186, + "learning_rate": 1.5769615598092908e-05, + "loss": 0.226, + "step": 16952, + "teacher_loss": 0.20682290196418762 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.34072333574295044, + "learning_rate": 1.5767347331010202e-05, + "loss": 0.2546, + "step": 16953, + "teacher_loss": 0.2449931502342224 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.414497435092926, + "learning_rate": 1.576507904633452e-05, + "loss": 0.2762, + "step": 16954, + "teacher_loss": 0.2608224153518677 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.9077692627906799, + "learning_rate": 1.576281074411787e-05, + "loss": 0.2855, + "step": 16955, + "teacher_loss": 0.216361865401268 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.36713796854019165, + "learning_rate": 1.5760542424412256e-05, + "loss": 0.2001, + "step": 16956, + "teacher_loss": 0.1815125048160553 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3706607520580292, + "learning_rate": 1.5758274087269686e-05, + "loss": 0.3722, + "step": 16957, + "teacher_loss": 0.37235724925994873 + }, + { + "compression_loss": 0.0, + "epoch": 3.06, + "label_loss": 0.3271789848804474, + "learning_rate": 1.5756005732742165e-05, + "loss": 0.1859, + "step": 16958, + "teacher_loss": 0.17024821043014526 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3409363925457001, + "learning_rate": 1.57537373608817e-05, + "loss": 0.2063, + "step": 16959, + "teacher_loss": 0.19136835634708405 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.4970812201499939, + "learning_rate": 1.5751468971740296e-05, + "loss": 0.2739, + "step": 16960, + "teacher_loss": 0.24905620515346527 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.6212697625160217, + "learning_rate": 1.574920056536996e-05, + "loss": 0.2069, + "step": 16961, + "teacher_loss": 0.1608992964029312 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.2803244888782501, + "learning_rate": 1.5746932141822705e-05, + "loss": 0.1998, + "step": 16962, + "teacher_loss": 0.190854012966156 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.5244549512863159, + "learning_rate": 1.574466370115053e-05, + "loss": 0.2889, + "step": 16963, + "teacher_loss": 0.2626722753047943 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.42564713954925537, + "learning_rate": 1.5742395243405458e-05, + "loss": 0.1807, + "step": 16964, + "teacher_loss": 0.15346242487430573 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.9706597924232483, + "learning_rate": 1.574012676863948e-05, + "loss": 0.5769, + "step": 16965, + "teacher_loss": 0.5330967903137207 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.5636691451072693, + "learning_rate": 1.5737858276904617e-05, + "loss": 0.2442, + "step": 16966, + "teacher_loss": 0.20874431729316711 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.75600266456604, + "learning_rate": 1.5735589768252875e-05, + "loss": 0.356, + "step": 16967, + "teacher_loss": 0.31152307987213135 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.6100039482116699, + "learning_rate": 1.5733321242736263e-05, + "loss": 0.3166, + "step": 16968, + "teacher_loss": 0.2839508354663849 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.5439411401748657, + "learning_rate": 1.57310527004068e-05, + "loss": 0.2952, + "step": 16969, + "teacher_loss": 0.2675391435623169 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3686427175998688, + "learning_rate": 1.5728784141316487e-05, + "loss": 0.2198, + "step": 16970, + "teacher_loss": 0.2032998651266098 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3578403890132904, + "learning_rate": 1.572651556551734e-05, + "loss": 0.1955, + "step": 16971, + "teacher_loss": 0.17742550373077393 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.22191378474235535, + "learning_rate": 1.5724246973061363e-05, + "loss": 0.222, + "step": 16972, + "teacher_loss": 0.22204521298408508 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.18693555891513824, + "learning_rate": 1.5721978364000577e-05, + "loss": 0.1387, + "step": 16973, + "teacher_loss": 0.13335445523262024 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.5246123671531677, + "learning_rate": 1.5719709738386995e-05, + "loss": 0.2645, + "step": 16974, + "teacher_loss": 0.23556794226169586 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.778317391872406, + "learning_rate": 1.5717441096272628e-05, + "loss": 0.2085, + "step": 16975, + "teacher_loss": 0.1452065408229828 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.45208150148391724, + "learning_rate": 1.571517243770948e-05, + "loss": 0.3653, + "step": 16976, + "teacher_loss": 0.3556939363479614 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3655804991722107, + "learning_rate": 1.571290376274958e-05, + "loss": 0.2238, + "step": 16977, + "teacher_loss": 0.2080153226852417 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.6207676529884338, + "learning_rate": 1.571063507144493e-05, + "loss": 0.3204, + "step": 16978, + "teacher_loss": 0.2869967222213745 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.357609361410141, + "learning_rate": 1.5708366363847553e-05, + "loss": 0.2033, + "step": 16979, + "teacher_loss": 0.18611447513103485 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.5792220830917358, + "learning_rate": 1.5706097640009452e-05, + "loss": 0.2432, + "step": 16980, + "teacher_loss": 0.2059001624584198 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.2850901782512665, + "learning_rate": 1.5703828899982654e-05, + "loss": 0.2178, + "step": 16981, + "teacher_loss": 0.21033713221549988 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.4918980896472931, + "learning_rate": 1.5701560143819167e-05, + "loss": 0.2231, + "step": 16982, + "teacher_loss": 0.1932334303855896 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.2205010950565338, + "learning_rate": 1.5699291371571012e-05, + "loss": 0.2158, + "step": 16983, + "teacher_loss": 0.21523047983646393 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.30096447467803955, + "learning_rate": 1.5697022583290204e-05, + "loss": 0.1976, + "step": 16984, + "teacher_loss": 0.18609781563282013 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.6012523174285889, + "learning_rate": 1.5694753779028754e-05, + "loss": 0.3176, + "step": 16985, + "teacher_loss": 0.28607243299484253 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.41279304027557373, + "learning_rate": 1.5692484958838683e-05, + "loss": 0.237, + "step": 16986, + "teacher_loss": 0.21743261814117432 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.16249369084835052, + "learning_rate": 1.569021612277201e-05, + "loss": 0.1375, + "step": 16987, + "teacher_loss": 0.13474775850772858 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 1.0769838094711304, + "learning_rate": 1.5687947270880748e-05, + "loss": 0.3405, + "step": 16988, + "teacher_loss": 0.258626252412796 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.4901203513145447, + "learning_rate": 1.568567840321692e-05, + "loss": 0.2161, + "step": 16989, + "teacher_loss": 0.18562708795070648 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.6647986769676208, + "learning_rate": 1.568340951983254e-05, + "loss": 0.2612, + "step": 16990, + "teacher_loss": 0.21638908982276917 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3793599605560303, + "learning_rate": 1.5681140620779633e-05, + "loss": 0.2154, + "step": 16991, + "teacher_loss": 0.1972190886735916 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3759060502052307, + "learning_rate": 1.567887170611021e-05, + "loss": 0.2502, + "step": 16992, + "teacher_loss": 0.23621763288974762 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.19875489175319672, + "learning_rate": 1.567660277587629e-05, + "loss": 0.2139, + "step": 16993, + "teacher_loss": 0.21553188562393188 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.5348937511444092, + "learning_rate": 1.5674333830129907e-05, + "loss": 0.2496, + "step": 16994, + "teacher_loss": 0.21786893904209137 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.30575063824653625, + "learning_rate": 1.5672064868923064e-05, + "loss": 0.1935, + "step": 16995, + "teacher_loss": 0.18104791641235352 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3474254012107849, + "learning_rate": 1.566979589230779e-05, + "loss": 0.2716, + "step": 16996, + "teacher_loss": 0.26320019364356995 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.21591754257678986, + "learning_rate": 1.5667526900336107e-05, + "loss": 0.153, + "step": 16997, + "teacher_loss": 0.146036297082901 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.7553324699401855, + "learning_rate": 1.5665257893060026e-05, + "loss": 0.3688, + "step": 16998, + "teacher_loss": 0.32585006952285767 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.3161123991012573, + "learning_rate": 1.5662988870531585e-05, + "loss": 0.2839, + "step": 16999, + "teacher_loss": 0.28030043840408325 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.6127636432647705, + "learning_rate": 1.5660719832802793e-05, + "loss": 0.2526, + "step": 17000, + "teacher_loss": 0.21252989768981934 + }, + { + "epoch": 3.07, + "eval_exact_match": 79.84862819299906, + "eval_f1": 87.49352348954031, + "step": 17000 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.2902565002441406, + "learning_rate": 1.5658450779925674e-05, + "loss": 0.2521, + "step": 17001, + "teacher_loss": 0.24788770079612732 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.46691271662712097, + "learning_rate": 1.5656181711952255e-05, + "loss": 0.2204, + "step": 17002, + "teacher_loss": 0.19297060370445251 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.1869509518146515, + "learning_rate": 1.565391262893456e-05, + "loss": 0.2104, + "step": 17003, + "teacher_loss": 0.2129782885313034 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.23464754223823547, + "learning_rate": 1.5651643530924604e-05, + "loss": 0.2597, + "step": 17004, + "teacher_loss": 0.2625196874141693 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.523677408695221, + "learning_rate": 1.5649374417974417e-05, + "loss": 0.3193, + "step": 17005, + "teacher_loss": 0.2965613305568695 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.10576523840427399, + "learning_rate": 1.564710529013602e-05, + "loss": 0.1803, + "step": 17006, + "teacher_loss": 0.1885388195514679 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.21308773756027222, + "learning_rate": 1.564483614746144e-05, + "loss": 0.1942, + "step": 17007, + "teacher_loss": 0.19205442070960999 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.30874884128570557, + "learning_rate": 1.5642566990002705e-05, + "loss": 0.2286, + "step": 17008, + "teacher_loss": 0.21973197162151337 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.16348449885845184, + "learning_rate": 1.564029781781183e-05, + "loss": 0.2655, + "step": 17009, + "teacher_loss": 0.27687662839889526 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.36447077989578247, + "learning_rate": 1.5638028630940844e-05, + "loss": 0.1744, + "step": 17010, + "teacher_loss": 0.1532387137413025 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.46360915899276733, + "learning_rate": 1.563575942944178e-05, + "loss": 0.286, + "step": 17011, + "teacher_loss": 0.26626139879226685 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.32487574219703674, + "learning_rate": 1.5633490213366655e-05, + "loss": 0.2146, + "step": 17012, + "teacher_loss": 0.20231729745864868 + }, + { + "compression_loss": 0.0, + "epoch": 3.07, + "label_loss": 0.44872552156448364, + "learning_rate": 1.56312209827675e-05, + "loss": 0.2162, + "step": 17013, + "teacher_loss": 0.19032371044158936 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.37449783086776733, + "learning_rate": 1.5628951737696344e-05, + "loss": 0.2208, + "step": 17014, + "teacher_loss": 0.2037086933851242 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.29759496450424194, + "learning_rate": 1.5626682478205205e-05, + "loss": 0.1974, + "step": 17015, + "teacher_loss": 0.18624627590179443 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.651835560798645, + "learning_rate": 1.5624413204346114e-05, + "loss": 0.2663, + "step": 17016, + "teacher_loss": 0.22347280383110046 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.6556954979896545, + "learning_rate": 1.5622143916171105e-05, + "loss": 0.2756, + "step": 17017, + "teacher_loss": 0.23331734538078308 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.8042777180671692, + "learning_rate": 1.5619874613732198e-05, + "loss": 0.3854, + "step": 17018, + "teacher_loss": 0.33889976143836975 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.20930439233779907, + "learning_rate": 1.561760529708143e-05, + "loss": 0.1795, + "step": 17019, + "teacher_loss": 0.17623579502105713 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 1.0444622039794922, + "learning_rate": 1.561533596627082e-05, + "loss": 0.3384, + "step": 17020, + "teacher_loss": 0.25996947288513184 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.4599539041519165, + "learning_rate": 1.5613066621352397e-05, + "loss": 0.2829, + "step": 17021, + "teacher_loss": 0.26327618956565857 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.32906782627105713, + "learning_rate": 1.56107972623782e-05, + "loss": 0.19, + "step": 17022, + "teacher_loss": 0.17449253797531128 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.8386813402175903, + "learning_rate": 1.5608527889400253e-05, + "loss": 0.3193, + "step": 17023, + "teacher_loss": 0.2616077661514282 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.4148062467575073, + "learning_rate": 1.5606258502470585e-05, + "loss": 0.2496, + "step": 17024, + "teacher_loss": 0.23129773139953613 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.33648091554641724, + "learning_rate": 1.5603989101641228e-05, + "loss": 0.252, + "step": 17025, + "teacher_loss": 0.24256299436092377 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.6624933481216431, + "learning_rate": 1.560171968696421e-05, + "loss": 0.2565, + "step": 17026, + "teacher_loss": 0.2113877832889557 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.26048383116722107, + "learning_rate": 1.5599450258491567e-05, + "loss": 0.2651, + "step": 17027, + "teacher_loss": 0.2656324803829193 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5426404476165771, + "learning_rate": 1.5597180816275323e-05, + "loss": 0.3756, + "step": 17028, + "teacher_loss": 0.357077032327652 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.24016955494880676, + "learning_rate": 1.5594911360367513e-05, + "loss": 0.227, + "step": 17029, + "teacher_loss": 0.2255600243806839 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.7102572917938232, + "learning_rate": 1.5592641890820176e-05, + "loss": 0.2563, + "step": 17030, + "teacher_loss": 0.20590931177139282 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.2590523362159729, + "learning_rate": 1.559037240768533e-05, + "loss": 0.1834, + "step": 17031, + "teacher_loss": 0.17494651675224304 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5802199840545654, + "learning_rate": 1.5588102911015014e-05, + "loss": 0.3337, + "step": 17032, + "teacher_loss": 0.30636170506477356 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.4627974033355713, + "learning_rate": 1.558583340086127e-05, + "loss": 0.197, + "step": 17033, + "teacher_loss": 0.16745418310165405 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5122882723808289, + "learning_rate": 1.5583563877276116e-05, + "loss": 0.1902, + "step": 17034, + "teacher_loss": 0.15436051785945892 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.13830158114433289, + "learning_rate": 1.558129434031159e-05, + "loss": 0.1796, + "step": 17035, + "teacher_loss": 0.184239000082016 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.2738603353500366, + "learning_rate": 1.557902479001973e-05, + "loss": 0.161, + "step": 17036, + "teacher_loss": 0.1485133171081543 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.7040095329284668, + "learning_rate": 1.5576755226452568e-05, + "loss": 0.2813, + "step": 17037, + "teacher_loss": 0.23428919911384583 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.6015704274177551, + "learning_rate": 1.5574485649662138e-05, + "loss": 0.3524, + "step": 17038, + "teacher_loss": 0.32473304867744446 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.2029210329055786, + "learning_rate": 1.5572216059700478e-05, + "loss": 0.175, + "step": 17039, + "teacher_loss": 0.1718527227640152 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.7049412131309509, + "learning_rate": 1.5569946456619613e-05, + "loss": 0.4409, + "step": 17040, + "teacher_loss": 0.41151756048202515 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 1.1779247522354126, + "learning_rate": 1.5567676840471587e-05, + "loss": 0.3635, + "step": 17041, + "teacher_loss": 0.2730620205402374 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5995557308197021, + "learning_rate": 1.5565407211308436e-05, + "loss": 0.2942, + "step": 17042, + "teacher_loss": 0.2602502703666687 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.17783525586128235, + "learning_rate": 1.5563137569182184e-05, + "loss": 0.1801, + "step": 17043, + "teacher_loss": 0.18038839101791382 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.39081358909606934, + "learning_rate": 1.5560867914144887e-05, + "loss": 0.2799, + "step": 17044, + "teacher_loss": 0.26760998368263245 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.14298290014266968, + "learning_rate": 1.5558598246248563e-05, + "loss": 0.1786, + "step": 17045, + "teacher_loss": 0.18253114819526672 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5367259979248047, + "learning_rate": 1.5556328565545256e-05, + "loss": 0.2698, + "step": 17046, + "teacher_loss": 0.24019497632980347 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.40469419956207275, + "learning_rate": 1.555405887208701e-05, + "loss": 0.2318, + "step": 17047, + "teacher_loss": 0.2126368284225464 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.8280733227729797, + "learning_rate": 1.5551789165925845e-05, + "loss": 0.4051, + "step": 17048, + "teacher_loss": 0.35811880230903625 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.4851178526878357, + "learning_rate": 1.5549519447113815e-05, + "loss": 0.1964, + "step": 17049, + "teacher_loss": 0.16433373093605042 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.4729152023792267, + "learning_rate": 1.5547249715702952e-05, + "loss": 0.3231, + "step": 17050, + "teacher_loss": 0.30639877915382385 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.301393985748291, + "learning_rate": 1.554497997174529e-05, + "loss": 0.2618, + "step": 17051, + "teacher_loss": 0.2573610544204712 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5312014222145081, + "learning_rate": 1.5542710215292882e-05, + "loss": 0.2637, + "step": 17052, + "teacher_loss": 0.2339445799589157 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.25262972712516785, + "learning_rate": 1.5540440446397745e-05, + "loss": 0.1765, + "step": 17053, + "teacher_loss": 0.16803491115570068 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.6839818954467773, + "learning_rate": 1.553817066511193e-05, + "loss": 0.4102, + "step": 17054, + "teacher_loss": 0.3798198699951172 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.266541987657547, + "learning_rate": 1.553590087148748e-05, + "loss": 0.2689, + "step": 17055, + "teacher_loss": 0.26913249492645264 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.5451951026916504, + "learning_rate": 1.5533631065576425e-05, + "loss": 0.2408, + "step": 17056, + "teacher_loss": 0.20697185397148132 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.46809470653533936, + "learning_rate": 1.553136124743081e-05, + "loss": 0.2042, + "step": 17057, + "teacher_loss": 0.17489968240261078 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.3487834930419922, + "learning_rate": 1.5529091417102678e-05, + "loss": 0.2456, + "step": 17058, + "teacher_loss": 0.23416444659233093 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.2697889804840088, + "learning_rate": 1.552682157464406e-05, + "loss": 0.1999, + "step": 17059, + "teacher_loss": 0.1921597272157669 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.35291755199432373, + "learning_rate": 1.552455172010701e-05, + "loss": 0.2293, + "step": 17060, + "teacher_loss": 0.2155713438987732 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.4749341607093811, + "learning_rate": 1.5522281853543554e-05, + "loss": 0.2689, + "step": 17061, + "teacher_loss": 0.24605943262577057 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.6117103695869446, + "learning_rate": 1.552001197500575e-05, + "loss": 0.2499, + "step": 17062, + "teacher_loss": 0.2096526026725769 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.42779114842414856, + "learning_rate": 1.551774208454562e-05, + "loss": 0.3988, + "step": 17063, + "teacher_loss": 0.395539253950119 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.23250603675842285, + "learning_rate": 1.551547218221523e-05, + "loss": 0.1492, + "step": 17064, + "teacher_loss": 0.1399080604314804 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.31456995010375977, + "learning_rate": 1.5513202268066593e-05, + "loss": 0.2433, + "step": 17065, + "teacher_loss": 0.2354004979133606 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.8242419958114624, + "learning_rate": 1.5510932342151778e-05, + "loss": 0.2947, + "step": 17066, + "teacher_loss": 0.2358134388923645 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.1256561428308487, + "learning_rate": 1.5508662404522812e-05, + "loss": 0.1372, + "step": 17067, + "teacher_loss": 0.13844355940818787 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 0.6293669939041138, + "learning_rate": 1.5506392455231745e-05, + "loss": 0.3395, + "step": 17068, + "teacher_loss": 0.3072940707206726 + }, + { + "compression_loss": 0.0, + "epoch": 3.08, + "label_loss": 1.000198245048523, + "learning_rate": 1.5504122494330613e-05, + "loss": 0.2622, + "step": 17069, + "teacher_loss": 0.18025052547454834 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.2902330756187439, + "learning_rate": 1.5501852521871463e-05, + "loss": 0.1762, + "step": 17070, + "teacher_loss": 0.1634884625673294 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5651159286499023, + "learning_rate": 1.5499582537906342e-05, + "loss": 0.3239, + "step": 17071, + "teacher_loss": 0.29711204767227173 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.10558003187179565, + "learning_rate": 1.5497312542487294e-05, + "loss": 0.1529, + "step": 17072, + "teacher_loss": 0.15810570120811462 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.24813322722911835, + "learning_rate": 1.549504253566635e-05, + "loss": 0.163, + "step": 17073, + "teacher_loss": 0.15349268913269043 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.3207513391971588, + "learning_rate": 1.5492772517495577e-05, + "loss": 0.2443, + "step": 17074, + "teacher_loss": 0.2357589602470398 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.8022313714027405, + "learning_rate": 1.5490502488027e-05, + "loss": 0.3246, + "step": 17075, + "teacher_loss": 0.2715368866920471 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.33774176239967346, + "learning_rate": 1.5488232447312674e-05, + "loss": 0.1936, + "step": 17076, + "teacher_loss": 0.17758655548095703 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.4840281307697296, + "learning_rate": 1.5485962395404646e-05, + "loss": 0.2115, + "step": 17077, + "teacher_loss": 0.18116378784179688 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.6643401384353638, + "learning_rate": 1.548369233235495e-05, + "loss": 0.2726, + "step": 17078, + "teacher_loss": 0.2290726751089096 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.671335756778717, + "learning_rate": 1.5481422258215643e-05, + "loss": 0.2992, + "step": 17079, + "teacher_loss": 0.2578341066837311 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5443426370620728, + "learning_rate": 1.5479152173038766e-05, + "loss": 0.2436, + "step": 17080, + "teacher_loss": 0.21020770072937012 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5381845831871033, + "learning_rate": 1.5476882076876366e-05, + "loss": 0.292, + "step": 17081, + "teacher_loss": 0.2646610736846924 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.1711021065711975, + "learning_rate": 1.5474611969780496e-05, + "loss": 0.1375, + "step": 17082, + "teacher_loss": 0.13379865884780884 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.3942982852458954, + "learning_rate": 1.5472341851803186e-05, + "loss": 0.3023, + "step": 17083, + "teacher_loss": 0.29207664728164673 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5175685882568359, + "learning_rate": 1.5470071722996502e-05, + "loss": 0.2305, + "step": 17084, + "teacher_loss": 0.19856303930282593 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.3329129219055176, + "learning_rate": 1.546780158341248e-05, + "loss": 0.204, + "step": 17085, + "teacher_loss": 0.1896638572216034 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.45642080903053284, + "learning_rate": 1.5465531433103166e-05, + "loss": 0.224, + "step": 17086, + "teacher_loss": 0.19817125797271729 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5998003482818604, + "learning_rate": 1.5463261272120615e-05, + "loss": 0.2662, + "step": 17087, + "teacher_loss": 0.22911834716796875 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.34131187200546265, + "learning_rate": 1.5460991100516873e-05, + "loss": 0.193, + "step": 17088, + "teacher_loss": 0.1764889359474182 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.7740792632102966, + "learning_rate": 1.5458720918343987e-05, + "loss": 0.3803, + "step": 17089, + "teacher_loss": 0.33654695749282837 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.1339554786682129, + "learning_rate": 1.5456450725654007e-05, + "loss": 0.1879, + "step": 17090, + "teacher_loss": 0.19389373064041138 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.44405293464660645, + "learning_rate": 1.5454180522498976e-05, + "loss": 0.2867, + "step": 17091, + "teacher_loss": 0.2692013382911682 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5587217807769775, + "learning_rate": 1.545191030893095e-05, + "loss": 0.248, + "step": 17092, + "teacher_loss": 0.2134218066930771 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.4008667469024658, + "learning_rate": 1.5449640085001977e-05, + "loss": 0.1709, + "step": 17093, + "teacher_loss": 0.14538581669330597 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.288235068321228, + "learning_rate": 1.5447369850764103e-05, + "loss": 0.1963, + "step": 17094, + "teacher_loss": 0.18603640794754028 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.44260019063949585, + "learning_rate": 1.5445099606269378e-05, + "loss": 0.2125, + "step": 17095, + "teacher_loss": 0.1869882494211197 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.6100733280181885, + "learning_rate": 1.5442829351569854e-05, + "loss": 0.26, + "step": 17096, + "teacher_loss": 0.2211434245109558 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.7037139534950256, + "learning_rate": 1.5440559086717583e-05, + "loss": 0.4707, + "step": 17097, + "teacher_loss": 0.44484108686447144 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.32256880402565, + "learning_rate": 1.543828881176461e-05, + "loss": 0.1997, + "step": 17098, + "teacher_loss": 0.18600735068321228 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.6091248989105225, + "learning_rate": 1.5436018526762992e-05, + "loss": 0.2592, + "step": 17099, + "teacher_loss": 0.22032345831394196 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.8146679401397705, + "learning_rate": 1.5433748231764774e-05, + "loss": 0.2706, + "step": 17100, + "teacher_loss": 0.21019107103347778 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5218662023544312, + "learning_rate": 1.543147792682201e-05, + "loss": 0.1837, + "step": 17101, + "teacher_loss": 0.1460893601179123 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5056889057159424, + "learning_rate": 1.5429207611986756e-05, + "loss": 0.2189, + "step": 17102, + "teacher_loss": 0.18699193000793457 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.23382161557674408, + "learning_rate": 1.542693728731105e-05, + "loss": 0.289, + "step": 17103, + "teacher_loss": 0.2951442003250122 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5903704166412354, + "learning_rate": 1.5424666952846958e-05, + "loss": 0.4089, + "step": 17104, + "teacher_loss": 0.3887832462787628 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.6547254323959351, + "learning_rate": 1.542239660864652e-05, + "loss": 0.3367, + "step": 17105, + "teacher_loss": 0.30138659477233887 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.37489405274391174, + "learning_rate": 1.5420126254761797e-05, + "loss": 0.2052, + "step": 17106, + "teacher_loss": 0.18638189136981964 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5926855802536011, + "learning_rate": 1.5417855891244845e-05, + "loss": 0.2747, + "step": 17107, + "teacher_loss": 0.23941358923912048 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.1621163785457611, + "learning_rate": 1.54155855181477e-05, + "loss": 0.1996, + "step": 17108, + "teacher_loss": 0.2038199007511139 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.40665680170059204, + "learning_rate": 1.5413315135522434e-05, + "loss": 0.1943, + "step": 17109, + "teacher_loss": 0.17071747779846191 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.59168541431427, + "learning_rate": 1.5411044743421084e-05, + "loss": 0.2225, + "step": 17110, + "teacher_loss": 0.18144477903842926 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.30516937375068665, + "learning_rate": 1.5408774341895714e-05, + "loss": 0.1838, + "step": 17111, + "teacher_loss": 0.17028182744979858 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5123518705368042, + "learning_rate": 1.5406503930998375e-05, + "loss": 0.2533, + "step": 17112, + "teacher_loss": 0.2244957685470581 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.3951176404953003, + "learning_rate": 1.5404233510781112e-05, + "loss": 0.2398, + "step": 17113, + "teacher_loss": 0.22251909971237183 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.39007332921028137, + "learning_rate": 1.5401963081295994e-05, + "loss": 0.1807, + "step": 17114, + "teacher_loss": 0.1574709117412567 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.2659448981285095, + "learning_rate": 1.539969264259507e-05, + "loss": 0.2304, + "step": 17115, + "teacher_loss": 0.2264612317085266 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.4305671453475952, + "learning_rate": 1.5397422194730384e-05, + "loss": 0.2189, + "step": 17116, + "teacher_loss": 0.19543595612049103 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.6756407022476196, + "learning_rate": 1.5395151737754002e-05, + "loss": 0.2883, + "step": 17117, + "teacher_loss": 0.2452574074268341 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.17432333528995514, + "learning_rate": 1.539288127171798e-05, + "loss": 0.1692, + "step": 17118, + "teacher_loss": 0.16858944296836853 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5327430963516235, + "learning_rate": 1.539061079667436e-05, + "loss": 0.2406, + "step": 17119, + "teacher_loss": 0.20810110867023468 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.27331963181495667, + "learning_rate": 1.5388340312675215e-05, + "loss": 0.2146, + "step": 17120, + "teacher_loss": 0.20806702971458435 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.3281494081020355, + "learning_rate": 1.538606981977258e-05, + "loss": 0.2598, + "step": 17121, + "teacher_loss": 0.2522505819797516 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.8761147260665894, + "learning_rate": 1.5383799318018527e-05, + "loss": 0.2753, + "step": 17122, + "teacher_loss": 0.2085953652858734 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.4426186978816986, + "learning_rate": 1.5381528807465113e-05, + "loss": 0.2395, + "step": 17123, + "teacher_loss": 0.2169855237007141 + }, + { + "compression_loss": 0.0, + "epoch": 3.09, + "label_loss": 0.5183864831924438, + "learning_rate": 1.5379258288164375e-05, + "loss": 0.5789, + "step": 17124, + "teacher_loss": 0.5856744050979614 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.5461440682411194, + "learning_rate": 1.537698776016839e-05, + "loss": 0.3671, + "step": 17125, + "teacher_loss": 0.34719714522361755 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.16817721724510193, + "learning_rate": 1.5374717223529203e-05, + "loss": 0.1235, + "step": 17126, + "teacher_loss": 0.118507519364357 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.16240057349205017, + "learning_rate": 1.5372446678298876e-05, + "loss": 0.1915, + "step": 17127, + "teacher_loss": 0.1947633922100067 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.07226384431123734, + "learning_rate": 1.5370176124529457e-05, + "loss": 0.1392, + "step": 17128, + "teacher_loss": 0.1465841829776764 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.3599405288696289, + "learning_rate": 1.5367905562273016e-05, + "loss": 0.2177, + "step": 17129, + "teacher_loss": 0.20191195607185364 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.44051820039749146, + "learning_rate": 1.53656349915816e-05, + "loss": 0.2503, + "step": 17130, + "teacher_loss": 0.22919318079948425 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.43584781885147095, + "learning_rate": 1.5363364412507272e-05, + "loss": 0.2541, + "step": 17131, + "teacher_loss": 0.23385834693908691 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.38961225748062134, + "learning_rate": 1.5361093825102087e-05, + "loss": 0.2901, + "step": 17132, + "teacher_loss": 0.2789991497993469 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.4471387267112732, + "learning_rate": 1.53588232294181e-05, + "loss": 0.2394, + "step": 17133, + "teacher_loss": 0.21637234091758728 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.4134955406188965, + "learning_rate": 1.5356552625507377e-05, + "loss": 0.2378, + "step": 17134, + "teacher_loss": 0.21828094124794006 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.15211328864097595, + "learning_rate": 1.535428201342197e-05, + "loss": 0.1786, + "step": 17135, + "teacher_loss": 0.18156926333904266 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.2574191987514496, + "learning_rate": 1.5352011393213932e-05, + "loss": 0.1849, + "step": 17136, + "teacher_loss": 0.1768171787261963 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.3325204849243164, + "learning_rate": 1.5349740764935342e-05, + "loss": 0.2647, + "step": 17137, + "teacher_loss": 0.2572069466114044 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.8556715250015259, + "learning_rate": 1.5347470128638233e-05, + "loss": 0.2253, + "step": 17138, + "teacher_loss": 0.15525224804878235 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.25372314453125, + "learning_rate": 1.534519948437468e-05, + "loss": 0.2383, + "step": 17139, + "teacher_loss": 0.23653483390808105 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.7254093885421753, + "learning_rate": 1.534292883219674e-05, + "loss": 0.3888, + "step": 17140, + "teacher_loss": 0.3513984680175781 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.3339449167251587, + "learning_rate": 1.534065817215646e-05, + "loss": 0.1849, + "step": 17141, + "teacher_loss": 0.16836772859096527 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.5454400181770325, + "learning_rate": 1.5338387504305924e-05, + "loss": 0.191, + "step": 17142, + "teacher_loss": 0.15158015489578247 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.20470893383026123, + "learning_rate": 1.5336116828697165e-05, + "loss": 0.2203, + "step": 17143, + "teacher_loss": 0.22203657031059265 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.4558107256889343, + "learning_rate": 1.5333846145382256e-05, + "loss": 0.2508, + "step": 17144, + "teacher_loss": 0.22805365920066833 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.2915193736553192, + "learning_rate": 1.5331575454413263e-05, + "loss": 0.2303, + "step": 17145, + "teacher_loss": 0.22344574332237244 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.32340073585510254, + "learning_rate": 1.5329304755842237e-05, + "loss": 0.2881, + "step": 17146, + "teacher_loss": 0.2841394543647766 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.13586458563804626, + "learning_rate": 1.5327034049721238e-05, + "loss": 0.1963, + "step": 17147, + "teacher_loss": 0.20306968688964844 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.3415514826774597, + "learning_rate": 1.5324763336102333e-05, + "loss": 0.1447, + "step": 17148, + "teacher_loss": 0.12282686680555344 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.49278968572616577, + "learning_rate": 1.532249261503757e-05, + "loss": 0.2198, + "step": 17149, + "teacher_loss": 0.18943192064762115 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.551611602306366, + "learning_rate": 1.5320221886579022e-05, + "loss": 0.4034, + "step": 17150, + "teacher_loss": 0.3869735598564148 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.29609113931655884, + "learning_rate": 1.531795115077875e-05, + "loss": 0.282, + "step": 17151, + "teacher_loss": 0.28047001361846924 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.5154891014099121, + "learning_rate": 1.531568040768881e-05, + "loss": 0.2672, + "step": 17152, + "teacher_loss": 0.23960211873054504 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.7545921802520752, + "learning_rate": 1.5313409657361263e-05, + "loss": 0.27, + "step": 17153, + "teacher_loss": 0.21612633764743805 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.32348132133483887, + "learning_rate": 1.531113889984817e-05, + "loss": 0.2136, + "step": 17154, + "teacher_loss": 0.20138849318027496 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.44227826595306396, + "learning_rate": 1.5308868135201597e-05, + "loss": 0.2403, + "step": 17155, + "teacher_loss": 0.21786090731620789 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.33001893758773804, + "learning_rate": 1.5306597363473607e-05, + "loss": 0.2259, + "step": 17156, + "teacher_loss": 0.21437734365463257 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.15579232573509216, + "learning_rate": 1.5304326584716255e-05, + "loss": 0.1375, + "step": 17157, + "teacher_loss": 0.13541460037231445 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.1577894389629364, + "learning_rate": 1.5302055798981606e-05, + "loss": 0.1866, + "step": 17158, + "teacher_loss": 0.18984350562095642 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.2677778899669647, + "learning_rate": 1.5299785006321723e-05, + "loss": 0.2198, + "step": 17159, + "teacher_loss": 0.21442213654518127 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.17450259625911713, + "learning_rate": 1.5297514206788673e-05, + "loss": 0.1593, + "step": 17160, + "teacher_loss": 0.15759287774562836 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.3964899778366089, + "learning_rate": 1.529524340043451e-05, + "loss": 0.2236, + "step": 17161, + "teacher_loss": 0.2043883502483368 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.41697049140930176, + "learning_rate": 1.5292972587311305e-05, + "loss": 0.2236, + "step": 17162, + "teacher_loss": 0.20210015773773193 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.35055863857269287, + "learning_rate": 1.5290701767471114e-05, + "loss": 0.2626, + "step": 17163, + "teacher_loss": 0.25284022092819214 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.4551340639591217, + "learning_rate": 1.5288430940966e-05, + "loss": 0.3041, + "step": 17164, + "teacher_loss": 0.28732168674468994 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.5325958728790283, + "learning_rate": 1.5286160107848036e-05, + "loss": 0.4214, + "step": 17165, + "teacher_loss": 0.4090902805328369 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.32873302698135376, + "learning_rate": 1.5283889268169274e-05, + "loss": 0.271, + "step": 17166, + "teacher_loss": 0.264535129070282 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.1491089165210724, + "learning_rate": 1.5281618421981788e-05, + "loss": 0.2093, + "step": 17167, + "teacher_loss": 0.21599061787128448 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.41085103154182434, + "learning_rate": 1.5279347569337627e-05, + "loss": 0.3831, + "step": 17168, + "teacher_loss": 0.38001060485839844 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.39177989959716797, + "learning_rate": 1.5277076710288867e-05, + "loss": 0.3015, + "step": 17169, + "teacher_loss": 0.2914811670780182 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.9250391125679016, + "learning_rate": 1.5274805844887574e-05, + "loss": 0.312, + "step": 17170, + "teacher_loss": 0.24393875896930695 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.36199951171875, + "learning_rate": 1.5272534973185802e-05, + "loss": 0.311, + "step": 17171, + "teacher_loss": 0.3053860068321228 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.24936191737651825, + "learning_rate": 1.527026409523562e-05, + "loss": 0.208, + "step": 17172, + "teacher_loss": 0.2034320831298828 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.2587032914161682, + "learning_rate": 1.5267993211089095e-05, + "loss": 0.1899, + "step": 17173, + "teacher_loss": 0.18229596316814423 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.17346785962581635, + "learning_rate": 1.5265722320798284e-05, + "loss": 0.1878, + "step": 17174, + "teacher_loss": 0.18940353393554688 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.5105569362640381, + "learning_rate": 1.5263451424415267e-05, + "loss": 0.2487, + "step": 17175, + "teacher_loss": 0.21955807507038116 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.20995180308818817, + "learning_rate": 1.526118052199209e-05, + "loss": 0.1509, + "step": 17176, + "teacher_loss": 0.14435049891471863 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.14361567795276642, + "learning_rate": 1.525890961358083e-05, + "loss": 0.2067, + "step": 17177, + "teacher_loss": 0.2136983871459961 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.6527017951011658, + "learning_rate": 1.5256638699233548e-05, + "loss": 0.3164, + "step": 17178, + "teacher_loss": 0.27902650833129883 + }, + { + "compression_loss": 0.0, + "epoch": 3.1, + "label_loss": 0.31982672214508057, + "learning_rate": 1.5254367779002309e-05, + "loss": 0.2659, + "step": 17179, + "teacher_loss": 0.2599627375602722 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.5161340236663818, + "learning_rate": 1.5252096852939181e-05, + "loss": 0.2354, + "step": 17180, + "teacher_loss": 0.2042478621006012 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.46684837341308594, + "learning_rate": 1.5249825921096228e-05, + "loss": 0.2445, + "step": 17181, + "teacher_loss": 0.21979549527168274 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6748241186141968, + "learning_rate": 1.5247554983525516e-05, + "loss": 0.2707, + "step": 17182, + "teacher_loss": 0.22574448585510254 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.7047570943832397, + "learning_rate": 1.5245284040279111e-05, + "loss": 0.3171, + "step": 17183, + "teacher_loss": 0.27403441071510315 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.378791868686676, + "learning_rate": 1.5243013091409073e-05, + "loss": 0.2907, + "step": 17184, + "teacher_loss": 0.28095754981040955 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.22697332501411438, + "learning_rate": 1.5240742136967477e-05, + "loss": 0.201, + "step": 17185, + "teacher_loss": 0.1981421411037445 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.4791354835033417, + "learning_rate": 1.5238471177006388e-05, + "loss": 0.2939, + "step": 17186, + "teacher_loss": 0.27332431077957153 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.34185707569122314, + "learning_rate": 1.5236200211577868e-05, + "loss": 0.2217, + "step": 17187, + "teacher_loss": 0.20837756991386414 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6027224659919739, + "learning_rate": 1.5233929240733981e-05, + "loss": 0.2758, + "step": 17188, + "teacher_loss": 0.23944266140460968 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.49354612827301025, + "learning_rate": 1.5231658264526806e-05, + "loss": 0.2485, + "step": 17189, + "teacher_loss": 0.22126665711402893 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3478447198867798, + "learning_rate": 1.5229387283008398e-05, + "loss": 0.1734, + "step": 17190, + "teacher_loss": 0.15400651097297668 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3695114850997925, + "learning_rate": 1.5227116296230824e-05, + "loss": 0.3932, + "step": 17191, + "teacher_loss": 0.39582574367523193 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.20844215154647827, + "learning_rate": 1.5224845304246163e-05, + "loss": 0.1798, + "step": 17192, + "teacher_loss": 0.17656593024730682 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.8951176404953003, + "learning_rate": 1.5222574307106463e-05, + "loss": 0.2144, + "step": 17193, + "teacher_loss": 0.1387416124343872 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.7001396417617798, + "learning_rate": 1.5220303304863807e-05, + "loss": 0.2505, + "step": 17194, + "teacher_loss": 0.20050784945487976 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.45884379744529724, + "learning_rate": 1.5218032297570255e-05, + "loss": 0.1982, + "step": 17195, + "teacher_loss": 0.16922956705093384 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.5640110373497009, + "learning_rate": 1.5215761285277874e-05, + "loss": 0.2644, + "step": 17196, + "teacher_loss": 0.23106548190116882 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.1853107511997223, + "learning_rate": 1.5213490268038736e-05, + "loss": 0.1897, + "step": 17197, + "teacher_loss": 0.19020459055900574 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3523780107498169, + "learning_rate": 1.5211219245904908e-05, + "loss": 0.2902, + "step": 17198, + "teacher_loss": 0.2832720875740051 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.7154030799865723, + "learning_rate": 1.5208948218928453e-05, + "loss": 0.266, + "step": 17199, + "teacher_loss": 0.21611285209655762 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.688982367515564, + "learning_rate": 1.5206677187161447e-05, + "loss": 0.3168, + "step": 17200, + "teacher_loss": 0.27550023794174194 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.31435054540634155, + "learning_rate": 1.520440615065595e-05, + "loss": 0.1772, + "step": 17201, + "teacher_loss": 0.16199825704097748 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3338039815425873, + "learning_rate": 1.5202135109464032e-05, + "loss": 0.1847, + "step": 17202, + "teacher_loss": 0.168122798204422 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.17167316377162933, + "learning_rate": 1.5199864063637761e-05, + "loss": 0.1637, + "step": 17203, + "teacher_loss": 0.16282892227172852 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.28445351123809814, + "learning_rate": 1.5197593013229204e-05, + "loss": 0.1555, + "step": 17204, + "teacher_loss": 0.14117391407489777 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.16458380222320557, + "learning_rate": 1.519532195829044e-05, + "loss": 0.1644, + "step": 17205, + "teacher_loss": 0.16439564526081085 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3590165376663208, + "learning_rate": 1.5193050898873522e-05, + "loss": 0.2734, + "step": 17206, + "teacher_loss": 0.2638833522796631 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6366062164306641, + "learning_rate": 1.5190779835030527e-05, + "loss": 0.2074, + "step": 17207, + "teacher_loss": 0.15966284275054932 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.8787311315536499, + "learning_rate": 1.5188508766813525e-05, + "loss": 0.3406, + "step": 17208, + "teacher_loss": 0.2808261513710022 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 1.0711147785186768, + "learning_rate": 1.5186237694274577e-05, + "loss": 0.3659, + "step": 17209, + "teacher_loss": 0.28753867745399475 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3061642646789551, + "learning_rate": 1.518396661746576e-05, + "loss": 0.21, + "step": 17210, + "teacher_loss": 0.19927716255187988 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.4438420534133911, + "learning_rate": 1.518169553643914e-05, + "loss": 0.2197, + "step": 17211, + "teacher_loss": 0.19478917121887207 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3180913031101227, + "learning_rate": 1.5179424451246787e-05, + "loss": 0.2041, + "step": 17212, + "teacher_loss": 0.19147509336471558 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.1515028029680252, + "learning_rate": 1.517715336194077e-05, + "loss": 0.1455, + "step": 17213, + "teacher_loss": 0.14481839537620544 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.12458354979753494, + "learning_rate": 1.5174882268573153e-05, + "loss": 0.1771, + "step": 17214, + "teacher_loss": 0.1829504519701004 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.2823290228843689, + "learning_rate": 1.5172611171196014e-05, + "loss": 0.1686, + "step": 17215, + "teacher_loss": 0.15591681003570557 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.28266263008117676, + "learning_rate": 1.517034006986142e-05, + "loss": 0.3185, + "step": 17216, + "teacher_loss": 0.3224581480026245 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.370505154132843, + "learning_rate": 1.5168068964621436e-05, + "loss": 0.2427, + "step": 17217, + "teacher_loss": 0.22850686311721802 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.1539417803287506, + "learning_rate": 1.516579785552813e-05, + "loss": 0.1598, + "step": 17218, + "teacher_loss": 0.16047051548957825 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.24063509702682495, + "learning_rate": 1.5163526742633583e-05, + "loss": 0.2104, + "step": 17219, + "teacher_loss": 0.20701248943805695 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.3107471168041229, + "learning_rate": 1.5161255625989859e-05, + "loss": 0.229, + "step": 17220, + "teacher_loss": 0.21993786096572876 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.4418913424015045, + "learning_rate": 1.5158984505649021e-05, + "loss": 0.2695, + "step": 17221, + "teacher_loss": 0.25036370754241943 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.4919448792934418, + "learning_rate": 1.5156713381663148e-05, + "loss": 0.3191, + "step": 17222, + "teacher_loss": 0.29993951320648193 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.4021309018135071, + "learning_rate": 1.5154442254084307e-05, + "loss": 0.233, + "step": 17223, + "teacher_loss": 0.2142338901758194 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.35398128628730774, + "learning_rate": 1.5152171122964569e-05, + "loss": 0.2655, + "step": 17224, + "teacher_loss": 0.255683958530426 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6045404076576233, + "learning_rate": 1.5149899988356005e-05, + "loss": 0.2901, + "step": 17225, + "teacher_loss": 0.2551615536212921 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.12888306379318237, + "learning_rate": 1.5147628850310675e-05, + "loss": 0.1842, + "step": 17226, + "teacher_loss": 0.19036611914634705 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.23146682977676392, + "learning_rate": 1.5145357708880667e-05, + "loss": 0.2177, + "step": 17227, + "teacher_loss": 0.21614739298820496 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.954110860824585, + "learning_rate": 1.5143086564118042e-05, + "loss": 0.2696, + "step": 17228, + "teacher_loss": 0.19348779320716858 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.19229744374752045, + "learning_rate": 1.5140815416074864e-05, + "loss": 0.1845, + "step": 17229, + "teacher_loss": 0.1836622804403305 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.5042635202407837, + "learning_rate": 1.513854426480322e-05, + "loss": 0.3646, + "step": 17230, + "teacher_loss": 0.3491074740886688 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6336063742637634, + "learning_rate": 1.5136273110355166e-05, + "loss": 0.2297, + "step": 17231, + "teacher_loss": 0.18479332327842712 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.4356980621814728, + "learning_rate": 1.5134001952782778e-05, + "loss": 0.2233, + "step": 17232, + "teacher_loss": 0.19973576068878174 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6457315683364868, + "learning_rate": 1.513173079213813e-05, + "loss": 0.2235, + "step": 17233, + "teacher_loss": 0.17659424245357513 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.6848845481872559, + "learning_rate": 1.5129459628473284e-05, + "loss": 0.3156, + "step": 17234, + "teacher_loss": 0.2745181918144226 + }, + { + "compression_loss": 0.0, + "epoch": 3.11, + "label_loss": 0.20801648497581482, + "learning_rate": 1.5127188461840327e-05, + "loss": 0.1317, + "step": 17235, + "teacher_loss": 0.12322592735290527 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.5595203042030334, + "learning_rate": 1.5124917292291312e-05, + "loss": 0.4279, + "step": 17236, + "teacher_loss": 0.41329264640808105 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.6127294301986694, + "learning_rate": 1.512264611987832e-05, + "loss": 0.2432, + "step": 17237, + "teacher_loss": 0.20211821794509888 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.17504458129405975, + "learning_rate": 1.5120374944653419e-05, + "loss": 0.1633, + "step": 17238, + "teacher_loss": 0.1620059609413147 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.2954214811325073, + "learning_rate": 1.5118103766668682e-05, + "loss": 0.2111, + "step": 17239, + "teacher_loss": 0.20170819759368896 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.35241201519966125, + "learning_rate": 1.5115832585976181e-05, + "loss": 0.187, + "step": 17240, + "teacher_loss": 0.16860079765319824 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3200010359287262, + "learning_rate": 1.5113561402627983e-05, + "loss": 0.1986, + "step": 17241, + "teacher_loss": 0.18508964776992798 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.44506824016571045, + "learning_rate": 1.5111290216676162e-05, + "loss": 0.2359, + "step": 17242, + "teacher_loss": 0.2126464992761612 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.4178299605846405, + "learning_rate": 1.5109019028172796e-05, + "loss": 0.2529, + "step": 17243, + "teacher_loss": 0.2345495969057083 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.5821305513381958, + "learning_rate": 1.5106747837169949e-05, + "loss": 0.2204, + "step": 17244, + "teacher_loss": 0.18020527064800262 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.5139521360397339, + "learning_rate": 1.5104476643719692e-05, + "loss": 0.2246, + "step": 17245, + "teacher_loss": 0.19250378012657166 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.48922663927078247, + "learning_rate": 1.5102205447874098e-05, + "loss": 0.2575, + "step": 17246, + "teacher_loss": 0.23173211514949799 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.2536476254463196, + "learning_rate": 1.509993424968524e-05, + "loss": 0.2202, + "step": 17247, + "teacher_loss": 0.2164355218410492 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.34476983547210693, + "learning_rate": 1.509766304920519e-05, + "loss": 0.1837, + "step": 17248, + "teacher_loss": 0.16580624878406525 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.7201471328735352, + "learning_rate": 1.5095391846486018e-05, + "loss": 0.243, + "step": 17249, + "teacher_loss": 0.1899665743112564 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3451116681098938, + "learning_rate": 1.50931206415798e-05, + "loss": 0.1725, + "step": 17250, + "teacher_loss": 0.15337088704109192 + }, + { + "epoch": 3.12, + "eval_exact_match": 79.73509933774834, + "eval_f1": 87.31426402735542, + "step": 17250 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.32742929458618164, + "learning_rate": 1.5090849434538602e-05, + "loss": 0.2051, + "step": 17251, + "teacher_loss": 0.19149544835090637 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.2632191479206085, + "learning_rate": 1.50885782254145e-05, + "loss": 0.1434, + "step": 17252, + "teacher_loss": 0.13006706535816193 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.7957450747489929, + "learning_rate": 1.5086307014259567e-05, + "loss": 0.3074, + "step": 17253, + "teacher_loss": 0.2530926465988159 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.400049090385437, + "learning_rate": 1.5084035801125865e-05, + "loss": 0.2307, + "step": 17254, + "teacher_loss": 0.21188125014305115 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.24235990643501282, + "learning_rate": 1.5081764586065484e-05, + "loss": 0.2006, + "step": 17255, + "teacher_loss": 0.19593161344528198 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.7102748155593872, + "learning_rate": 1.5079493369130481e-05, + "loss": 0.2861, + "step": 17256, + "teacher_loss": 0.23894359171390533 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.6256086826324463, + "learning_rate": 1.5077222150372933e-05, + "loss": 0.1996, + "step": 17257, + "teacher_loss": 0.15222826600074768 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.17724823951721191, + "learning_rate": 1.5074950929844916e-05, + "loss": 0.1784, + "step": 17258, + "teacher_loss": 0.17855240404605865 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.20423980057239532, + "learning_rate": 1.5072679707598495e-05, + "loss": 0.175, + "step": 17259, + "teacher_loss": 0.17169588804244995 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.7947405576705933, + "learning_rate": 1.507040848368575e-05, + "loss": 0.2721, + "step": 17260, + "teacher_loss": 0.2140299677848816 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.394207239151001, + "learning_rate": 1.5068137258158746e-05, + "loss": 0.2206, + "step": 17261, + "teacher_loss": 0.20128047466278076 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.17013733088970184, + "learning_rate": 1.5065866031069561e-05, + "loss": 0.2894, + "step": 17262, + "teacher_loss": 0.3026968538761139 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.5336381196975708, + "learning_rate": 1.5063594802470264e-05, + "loss": 0.2127, + "step": 17263, + "teacher_loss": 0.1770850121974945 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.33640623092651367, + "learning_rate": 1.5061323572412927e-05, + "loss": 0.2624, + "step": 17264, + "teacher_loss": 0.25413405895233154 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.6635050773620605, + "learning_rate": 1.5059052340949631e-05, + "loss": 0.2783, + "step": 17265, + "teacher_loss": 0.23552128672599792 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.27809059619903564, + "learning_rate": 1.5056781108132435e-05, + "loss": 0.2337, + "step": 17266, + "teacher_loss": 0.22874122858047485 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.7809144258499146, + "learning_rate": 1.5054509874013421e-05, + "loss": 0.3121, + "step": 17267, + "teacher_loss": 0.26003915071487427 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.1828133463859558, + "learning_rate": 1.5052238638644663e-05, + "loss": 0.1581, + "step": 17268, + "teacher_loss": 0.1553703397512436 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.2469458281993866, + "learning_rate": 1.5049967402078224e-05, + "loss": 0.187, + "step": 17269, + "teacher_loss": 0.18037953972816467 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.5360699892044067, + "learning_rate": 1.5047696164366183e-05, + "loss": 0.2615, + "step": 17270, + "teacher_loss": 0.2310405969619751 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.5356260538101196, + "learning_rate": 1.5045424925560613e-05, + "loss": 0.3302, + "step": 17271, + "teacher_loss": 0.3074026107788086 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.2949972152709961, + "learning_rate": 1.5043153685713584e-05, + "loss": 0.2089, + "step": 17272, + "teacher_loss": 0.19932758808135986 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3311798572540283, + "learning_rate": 1.5040882444877173e-05, + "loss": 0.2475, + "step": 17273, + "teacher_loss": 0.23816128075122833 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.8101736307144165, + "learning_rate": 1.5038611203103451e-05, + "loss": 0.3525, + "step": 17274, + "teacher_loss": 0.3016057014465332 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3346106708049774, + "learning_rate": 1.5036339960444488e-05, + "loss": 0.2306, + "step": 17275, + "teacher_loss": 0.21909326314926147 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.6711146235466003, + "learning_rate": 1.5034068716952361e-05, + "loss": 0.4125, + "step": 17276, + "teacher_loss": 0.3837779760360718 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3212396502494812, + "learning_rate": 1.5031797472679136e-05, + "loss": 0.1883, + "step": 17277, + "teacher_loss": 0.17357513308525085 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.49728041887283325, + "learning_rate": 1.5029526227676891e-05, + "loss": 0.248, + "step": 17278, + "teacher_loss": 0.2202717512845993 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.21826279163360596, + "learning_rate": 1.50272549819977e-05, + "loss": 0.2022, + "step": 17279, + "teacher_loss": 0.20043213665485382 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.4382314085960388, + "learning_rate": 1.5024983735693635e-05, + "loss": 0.2304, + "step": 17280, + "teacher_loss": 0.20733627676963806 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3459300398826599, + "learning_rate": 1.5022712488816767e-05, + "loss": 0.2495, + "step": 17281, + "teacher_loss": 0.23879435658454895 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.33973464369773865, + "learning_rate": 1.5020441241419167e-05, + "loss": 0.224, + "step": 17282, + "teacher_loss": 0.2111472189426422 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.46012091636657715, + "learning_rate": 1.5018169993552916e-05, + "loss": 0.2198, + "step": 17283, + "teacher_loss": 0.19307363033294678 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.6901553273200989, + "learning_rate": 1.5015898745270074e-05, + "loss": 0.3467, + "step": 17284, + "teacher_loss": 0.3085027039051056 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.20587536692619324, + "learning_rate": 1.5013627496622732e-05, + "loss": 0.24, + "step": 17285, + "teacher_loss": 0.2437414526939392 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.6419857740402222, + "learning_rate": 1.5011356247662942e-05, + "loss": 0.2972, + "step": 17286, + "teacher_loss": 0.2588920295238495 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.28395235538482666, + "learning_rate": 1.5009084998442793e-05, + "loss": 0.183, + "step": 17287, + "teacher_loss": 0.1718360185623169 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3092729449272156, + "learning_rate": 1.5006813749014353e-05, + "loss": 0.2128, + "step": 17288, + "teacher_loss": 0.2020946741104126 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.3032781183719635, + "learning_rate": 1.5004542499429686e-05, + "loss": 0.2324, + "step": 17289, + "teacher_loss": 0.224469393491745 + }, + { + "compression_loss": 0.0, + "epoch": 3.12, + "label_loss": 0.8242799639701843, + "learning_rate": 1.5002271249740886e-05, + "loss": 0.2882, + "step": 17290, + "teacher_loss": 0.22858509421348572 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.3714783787727356, + "learning_rate": 1.5e-05, + "loss": 0.2344, + "step": 17291, + "teacher_loss": 0.2191598117351532 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.3011588454246521, + "learning_rate": 1.4997728750259122e-05, + "loss": 0.2366, + "step": 17292, + "teacher_loss": 0.22947613894939423 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5027822256088257, + "learning_rate": 1.4995457500570312e-05, + "loss": 0.3898, + "step": 17293, + "teacher_loss": 0.3772868812084198 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.26266777515411377, + "learning_rate": 1.4993186250985651e-05, + "loss": 0.2882, + "step": 17294, + "teacher_loss": 0.29101717472076416 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.4714064598083496, + "learning_rate": 1.4990915001557211e-05, + "loss": 0.2854, + "step": 17295, + "teacher_loss": 0.26474443078041077 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.8741016387939453, + "learning_rate": 1.4988643752337058e-05, + "loss": 0.3245, + "step": 17296, + "teacher_loss": 0.26347556710243225 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.24074003100395203, + "learning_rate": 1.4986372503377272e-05, + "loss": 0.1833, + "step": 17297, + "teacher_loss": 0.17694228887557983 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.45717424154281616, + "learning_rate": 1.4984101254729928e-05, + "loss": 0.3024, + "step": 17298, + "teacher_loss": 0.28524285554885864 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.14835584163665771, + "learning_rate": 1.4981830006447084e-05, + "loss": 0.2015, + "step": 17299, + "teacher_loss": 0.20742233097553253 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 1.518599510192871, + "learning_rate": 1.4979558758580835e-05, + "loss": 0.3251, + "step": 17300, + "teacher_loss": 0.19244027137756348 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5807650089263916, + "learning_rate": 1.4977287511183238e-05, + "loss": 0.247, + "step": 17301, + "teacher_loss": 0.20989114046096802 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.7127798795700073, + "learning_rate": 1.497501626430637e-05, + "loss": 0.3071, + "step": 17302, + "teacher_loss": 0.2620091438293457 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.23384368419647217, + "learning_rate": 1.49727450180023e-05, + "loss": 0.1886, + "step": 17303, + "teacher_loss": 0.1836071014404297 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.3913388252258301, + "learning_rate": 1.497047377232311e-05, + "loss": 0.2028, + "step": 17304, + "teacher_loss": 0.18187090754508972 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.3458684980869293, + "learning_rate": 1.4968202527320868e-05, + "loss": 0.198, + "step": 17305, + "teacher_loss": 0.18154026567935944 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5905870795249939, + "learning_rate": 1.4965931283047643e-05, + "loss": 0.2405, + "step": 17306, + "teacher_loss": 0.20164187252521515 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.4692143201828003, + "learning_rate": 1.4963660039555515e-05, + "loss": 0.2561, + "step": 17307, + "teacher_loss": 0.23243489861488342 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5120751857757568, + "learning_rate": 1.4961388796896556e-05, + "loss": 0.2353, + "step": 17308, + "teacher_loss": 0.20450431108474731 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5066673755645752, + "learning_rate": 1.4959117555122826e-05, + "loss": 0.3804, + "step": 17309, + "teacher_loss": 0.36641961336135864 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6234026551246643, + "learning_rate": 1.4956846314286418e-05, + "loss": 0.3004, + "step": 17310, + "teacher_loss": 0.2645578980445862 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6348604559898376, + "learning_rate": 1.4954575074439392e-05, + "loss": 0.2721, + "step": 17311, + "teacher_loss": 0.23182141780853271 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.38520652055740356, + "learning_rate": 1.4952303835633817e-05, + "loss": 0.2117, + "step": 17312, + "teacher_loss": 0.19240404665470123 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.30941373109817505, + "learning_rate": 1.4950032597921779e-05, + "loss": 0.1894, + "step": 17313, + "teacher_loss": 0.1760375052690506 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5903949737548828, + "learning_rate": 1.4947761361355344e-05, + "loss": 0.2302, + "step": 17314, + "teacher_loss": 0.19021350145339966 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.4451650381088257, + "learning_rate": 1.494549012598658e-05, + "loss": 0.2436, + "step": 17315, + "teacher_loss": 0.2211552858352661 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.3629075586795807, + "learning_rate": 1.4943218891867567e-05, + "loss": 0.2752, + "step": 17316, + "teacher_loss": 0.26550936698913574 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.8448565006256104, + "learning_rate": 1.4940947659050374e-05, + "loss": 0.305, + "step": 17317, + "teacher_loss": 0.2449950873851776 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.7690628170967102, + "learning_rate": 1.4938676427587073e-05, + "loss": 0.3355, + "step": 17318, + "teacher_loss": 0.2872878909111023 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 1.2261710166931152, + "learning_rate": 1.4936405197529739e-05, + "loss": 0.3367, + "step": 17319, + "teacher_loss": 0.23790243268013 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.22403885424137115, + "learning_rate": 1.4934133968930441e-05, + "loss": 0.1844, + "step": 17320, + "teacher_loss": 0.1800086498260498 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.17598478496074677, + "learning_rate": 1.4931862741841253e-05, + "loss": 0.1978, + "step": 17321, + "teacher_loss": 0.20019902288913727 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6885719895362854, + "learning_rate": 1.4929591516314251e-05, + "loss": 0.256, + "step": 17322, + "teacher_loss": 0.20796188712120056 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.22574710845947266, + "learning_rate": 1.492732029240151e-05, + "loss": 0.1682, + "step": 17323, + "teacher_loss": 0.16186118125915527 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.18088002502918243, + "learning_rate": 1.4925049070155091e-05, + "loss": 0.167, + "step": 17324, + "teacher_loss": 0.1654605120420456 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.34204718470573425, + "learning_rate": 1.4922777849627068e-05, + "loss": 0.1736, + "step": 17325, + "teacher_loss": 0.15493807196617126 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6257104277610779, + "learning_rate": 1.4920506630869523e-05, + "loss": 0.2052, + "step": 17326, + "teacher_loss": 0.15847787261009216 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.31872308254241943, + "learning_rate": 1.491823541393452e-05, + "loss": 0.2294, + "step": 17327, + "teacher_loss": 0.21951383352279663 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5625432729721069, + "learning_rate": 1.4915964198874133e-05, + "loss": 0.3538, + "step": 17328, + "teacher_loss": 0.3305727243423462 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6109493970870972, + "learning_rate": 1.4913692985740438e-05, + "loss": 0.2188, + "step": 17329, + "teacher_loss": 0.17526112496852875 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.8130006790161133, + "learning_rate": 1.4911421774585503e-05, + "loss": 0.3233, + "step": 17330, + "teacher_loss": 0.2689291536808014 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.2950381338596344, + "learning_rate": 1.4909150565461397e-05, + "loss": 0.1835, + "step": 17331, + "teacher_loss": 0.17115579545497894 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.2552592158317566, + "learning_rate": 1.4906879358420201e-05, + "loss": 0.1449, + "step": 17332, + "teacher_loss": 0.13260522484779358 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.9623233079910278, + "learning_rate": 1.4904608153513986e-05, + "loss": 0.3492, + "step": 17333, + "teacher_loss": 0.28105485439300537 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.12002383917570114, + "learning_rate": 1.4902336950794808e-05, + "loss": 0.1455, + "step": 17334, + "teacher_loss": 0.1483183205127716 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.32650214433670044, + "learning_rate": 1.4900065750314762e-05, + "loss": 0.2309, + "step": 17335, + "teacher_loss": 0.22024744749069214 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 1.1242852210998535, + "learning_rate": 1.4897794552125906e-05, + "loss": 0.5022, + "step": 17336, + "teacher_loss": 0.43306225538253784 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.3050174415111542, + "learning_rate": 1.4895523356280312e-05, + "loss": 0.1474, + "step": 17337, + "teacher_loss": 0.12990710139274597 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.1475551426410675, + "learning_rate": 1.4893252162830055e-05, + "loss": 0.236, + "step": 17338, + "teacher_loss": 0.24583858251571655 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.25043943524360657, + "learning_rate": 1.4890980971827206e-05, + "loss": 0.218, + "step": 17339, + "teacher_loss": 0.2144230157136917 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6438193321228027, + "learning_rate": 1.4888709783323835e-05, + "loss": 0.4636, + "step": 17340, + "teacher_loss": 0.4435848295688629 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.544432520866394, + "learning_rate": 1.4886438597372018e-05, + "loss": 0.2158, + "step": 17341, + "teacher_loss": 0.17924290895462036 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.35039812326431274, + "learning_rate": 1.4884167414023823e-05, + "loss": 0.2581, + "step": 17342, + "teacher_loss": 0.24789577722549438 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.42001771926879883, + "learning_rate": 1.4881896233331319e-05, + "loss": 0.2221, + "step": 17343, + "teacher_loss": 0.20007112622261047 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.6026032567024231, + "learning_rate": 1.4879625055346584e-05, + "loss": 0.2893, + "step": 17344, + "teacher_loss": 0.2545427083969116 + }, + { + "compression_loss": 0.0, + "epoch": 3.13, + "label_loss": 0.5349169969558716, + "learning_rate": 1.4877353880121684e-05, + "loss": 0.3016, + "step": 17345, + "teacher_loss": 0.27568987011909485 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.6087794303894043, + "learning_rate": 1.4875082707708696e-05, + "loss": 0.2252, + "step": 17346, + "teacher_loss": 0.18255363404750824 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.7299664616584778, + "learning_rate": 1.4872811538159675e-05, + "loss": 0.2888, + "step": 17347, + "teacher_loss": 0.2397913634777069 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5684302449226379, + "learning_rate": 1.4870540371526718e-05, + "loss": 0.2399, + "step": 17348, + "teacher_loss": 0.2033928781747818 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.14928099513053894, + "learning_rate": 1.4868269207861877e-05, + "loss": 0.1681, + "step": 17349, + "teacher_loss": 0.17017295956611633 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.323599249124527, + "learning_rate": 1.4865998047217222e-05, + "loss": 0.2584, + "step": 17350, + "teacher_loss": 0.2511252164840698 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.2226908951997757, + "learning_rate": 1.4863726889644838e-05, + "loss": 0.1554, + "step": 17351, + "teacher_loss": 0.14787116646766663 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.27588048577308655, + "learning_rate": 1.4861455735196787e-05, + "loss": 0.1827, + "step": 17352, + "teacher_loss": 0.17239932715892792 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.22517824172973633, + "learning_rate": 1.4859184583925133e-05, + "loss": 0.1901, + "step": 17353, + "teacher_loss": 0.18622267246246338 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.36028891801834106, + "learning_rate": 1.4856913435881963e-05, + "loss": 0.2088, + "step": 17354, + "teacher_loss": 0.19196957349777222 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.608284056186676, + "learning_rate": 1.4854642291119335e-05, + "loss": 0.2621, + "step": 17355, + "teacher_loss": 0.22361181676387787 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.3511553406715393, + "learning_rate": 1.4852371149689322e-05, + "loss": 0.2394, + "step": 17356, + "teacher_loss": 0.22698359191417694 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 1.0278294086456299, + "learning_rate": 1.4850100011644e-05, + "loss": 0.3565, + "step": 17357, + "teacher_loss": 0.28185874223709106 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.33907994627952576, + "learning_rate": 1.4847828877035437e-05, + "loss": 0.2345, + "step": 17358, + "teacher_loss": 0.2228931188583374 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.9033491611480713, + "learning_rate": 1.4845557745915692e-05, + "loss": 0.3607, + "step": 17359, + "teacher_loss": 0.30040591955184937 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.7437979578971863, + "learning_rate": 1.4843286618336856e-05, + "loss": 0.2709, + "step": 17360, + "teacher_loss": 0.21839208900928497 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.546278715133667, + "learning_rate": 1.4841015494350983e-05, + "loss": 0.2817, + "step": 17361, + "teacher_loss": 0.2523575723171234 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.4615083336830139, + "learning_rate": 1.4838744374010142e-05, + "loss": 0.4003, + "step": 17362, + "teacher_loss": 0.393510103225708 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 1.232541799545288, + "learning_rate": 1.483647325736642e-05, + "loss": 0.4144, + "step": 17363, + "teacher_loss": 0.32349854707717896 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.4634174406528473, + "learning_rate": 1.4834202144471872e-05, + "loss": 0.2168, + "step": 17364, + "teacher_loss": 0.18938776850700378 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5026025176048279, + "learning_rate": 1.483193103537857e-05, + "loss": 0.2513, + "step": 17365, + "teacher_loss": 0.22332298755645752 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.6822613477706909, + "learning_rate": 1.4829659930138585e-05, + "loss": 0.2522, + "step": 17366, + "teacher_loss": 0.20437918603420258 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.10683953762054443, + "learning_rate": 1.4827388828803989e-05, + "loss": 0.1552, + "step": 17367, + "teacher_loss": 0.16059842705726624 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.209193617105484, + "learning_rate": 1.482511773142685e-05, + "loss": 0.1378, + "step": 17368, + "teacher_loss": 0.1298552006483078 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5235681533813477, + "learning_rate": 1.4822846638059234e-05, + "loss": 0.2145, + "step": 17369, + "teacher_loss": 0.18014344573020935 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.45085757970809937, + "learning_rate": 1.4820575548753215e-05, + "loss": 0.28, + "step": 17370, + "teacher_loss": 0.26097390055656433 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.24813106656074524, + "learning_rate": 1.4818304463560865e-05, + "loss": 0.1768, + "step": 17371, + "teacher_loss": 0.16886979341506958 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.3081740140914917, + "learning_rate": 1.481603338253424e-05, + "loss": 0.1931, + "step": 17372, + "teacher_loss": 0.18032169342041016 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.6924455165863037, + "learning_rate": 1.4813762305725426e-05, + "loss": 0.2884, + "step": 17373, + "teacher_loss": 0.24348080158233643 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.8001669049263, + "learning_rate": 1.4811491233186482e-05, + "loss": 0.3038, + "step": 17374, + "teacher_loss": 0.24869704246520996 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 1.205256700515747, + "learning_rate": 1.4809220164969475e-05, + "loss": 0.3118, + "step": 17375, + "teacher_loss": 0.21257489919662476 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.14333710074424744, + "learning_rate": 1.4806949101126482e-05, + "loss": 0.2143, + "step": 17376, + "teacher_loss": 0.222197026014328 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.10029410570859909, + "learning_rate": 1.4804678041709566e-05, + "loss": 0.1404, + "step": 17377, + "teacher_loss": 0.14483875036239624 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.16031135618686676, + "learning_rate": 1.4802406986770795e-05, + "loss": 0.1612, + "step": 17378, + "teacher_loss": 0.1613508015871048 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.12132205069065094, + "learning_rate": 1.4800135936362243e-05, + "loss": 0.1602, + "step": 17379, + "teacher_loss": 0.16454055905342102 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.4132627248764038, + "learning_rate": 1.4797864890535973e-05, + "loss": 0.4087, + "step": 17380, + "teacher_loss": 0.4081794023513794 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.9257434010505676, + "learning_rate": 1.4795593849344052e-05, + "loss": 0.2569, + "step": 17381, + "teacher_loss": 0.18255430459976196 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.18237711489200592, + "learning_rate": 1.4793322812838555e-05, + "loss": 0.2015, + "step": 17382, + "teacher_loss": 0.20359155535697937 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.2116706669330597, + "learning_rate": 1.479105178107155e-05, + "loss": 0.2096, + "step": 17383, + "teacher_loss": 0.20936527848243713 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.41488876938819885, + "learning_rate": 1.478878075409509e-05, + "loss": 0.3753, + "step": 17384, + "teacher_loss": 0.3709355294704437 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.6006801724433899, + "learning_rate": 1.4786509731961264e-05, + "loss": 0.3384, + "step": 17385, + "teacher_loss": 0.3093082010746002 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.6671277284622192, + "learning_rate": 1.4784238714722129e-05, + "loss": 0.4112, + "step": 17386, + "teacher_loss": 0.38274455070495605 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5413326621055603, + "learning_rate": 1.478196770242975e-05, + "loss": 0.1904, + "step": 17387, + "teacher_loss": 0.1514175832271576 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.28974029421806335, + "learning_rate": 1.4779696695136197e-05, + "loss": 0.2304, + "step": 17388, + "teacher_loss": 0.22377179563045502 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.884414553642273, + "learning_rate": 1.477742569289354e-05, + "loss": 0.3112, + "step": 17389, + "teacher_loss": 0.24747012555599213 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.16819430887699127, + "learning_rate": 1.4775154695753845e-05, + "loss": 0.1979, + "step": 17390, + "teacher_loss": 0.20117659866809845 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.4513154625892639, + "learning_rate": 1.4772883703769177e-05, + "loss": 0.2656, + "step": 17391, + "teacher_loss": 0.24493777751922607 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5327795743942261, + "learning_rate": 1.4770612716991606e-05, + "loss": 0.3886, + "step": 17392, + "teacher_loss": 0.3725517988204956 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5828357338905334, + "learning_rate": 1.4768341735473198e-05, + "loss": 0.2096, + "step": 17393, + "teacher_loss": 0.16809405386447906 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.37763193249702454, + "learning_rate": 1.4766070759266016e-05, + "loss": 0.2846, + "step": 17394, + "teacher_loss": 0.2742539644241333 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.48016589879989624, + "learning_rate": 1.4763799788422135e-05, + "loss": 0.2002, + "step": 17395, + "teacher_loss": 0.16908231377601624 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.5175419449806213, + "learning_rate": 1.4761528822993618e-05, + "loss": 0.4087, + "step": 17396, + "teacher_loss": 0.3966619074344635 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.274973064661026, + "learning_rate": 1.4759257863032522e-05, + "loss": 0.1866, + "step": 17397, + "teacher_loss": 0.1767333447933197 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.75236576795578, + "learning_rate": 1.475698690859093e-05, + "loss": 0.2922, + "step": 17398, + "teacher_loss": 0.24108746647834778 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.41506046056747437, + "learning_rate": 1.4754715959720897e-05, + "loss": 0.1797, + "step": 17399, + "teacher_loss": 0.15354523062705994 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.4944053888320923, + "learning_rate": 1.4752445016474489e-05, + "loss": 0.2465, + "step": 17400, + "teacher_loss": 0.21899157762527466 + }, + { + "compression_loss": 0.0, + "epoch": 3.14, + "label_loss": 0.3569111227989197, + "learning_rate": 1.4750174078903776e-05, + "loss": 0.213, + "step": 17401, + "teacher_loss": 0.1970204859972 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5005359649658203, + "learning_rate": 1.4747903147060823e-05, + "loss": 0.2421, + "step": 17402, + "teacher_loss": 0.2134018838405609 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5222065448760986, + "learning_rate": 1.4745632220997692e-05, + "loss": 0.2522, + "step": 17403, + "teacher_loss": 0.22216098010540009 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.6102386713027954, + "learning_rate": 1.4743361300766455e-05, + "loss": 0.2705, + "step": 17404, + "teacher_loss": 0.23276478052139282 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.20437079668045044, + "learning_rate": 1.4741090386419173e-05, + "loss": 0.203, + "step": 17405, + "teacher_loss": 0.20280733704566956 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.31127116084098816, + "learning_rate": 1.4738819478007918e-05, + "loss": 0.2524, + "step": 17406, + "teacher_loss": 0.24581119418144226 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.32153379917144775, + "learning_rate": 1.4736548575584737e-05, + "loss": 0.1917, + "step": 17407, + "teacher_loss": 0.17727014422416687 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.40570133924484253, + "learning_rate": 1.4734277679201716e-05, + "loss": 0.215, + "step": 17408, + "teacher_loss": 0.19385957717895508 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.16944542527198792, + "learning_rate": 1.4732006788910912e-05, + "loss": 0.1599, + "step": 17409, + "teacher_loss": 0.15888236463069916 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.7532769441604614, + "learning_rate": 1.4729735904764383e-05, + "loss": 0.4933, + "step": 17410, + "teacher_loss": 0.4644352197647095 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.13085927069187164, + "learning_rate": 1.4727465026814204e-05, + "loss": 0.1981, + "step": 17411, + "teacher_loss": 0.20560771226882935 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.20340105891227722, + "learning_rate": 1.4725194155112432e-05, + "loss": 0.1799, + "step": 17412, + "teacher_loss": 0.17723365128040314 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4465812146663666, + "learning_rate": 1.4722923289711133e-05, + "loss": 0.2701, + "step": 17413, + "teacher_loss": 0.25051164627075195 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.3256910443305969, + "learning_rate": 1.4720652430662375e-05, + "loss": 0.1804, + "step": 17414, + "teacher_loss": 0.1642308533191681 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.26035094261169434, + "learning_rate": 1.4718381578018218e-05, + "loss": 0.1924, + "step": 17415, + "teacher_loss": 0.18489119410514832 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.3798878788948059, + "learning_rate": 1.4716110731830727e-05, + "loss": 0.2, + "step": 17416, + "teacher_loss": 0.18005529046058655 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.548895001411438, + "learning_rate": 1.4713839892151968e-05, + "loss": 0.2037, + "step": 17417, + "teacher_loss": 0.16538025438785553 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5879232883453369, + "learning_rate": 1.4711569059034e-05, + "loss": 0.4327, + "step": 17418, + "teacher_loss": 0.4154396951198578 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.16994859278202057, + "learning_rate": 1.4709298232528887e-05, + "loss": 0.1228, + "step": 17419, + "teacher_loss": 0.11754976958036423 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.6238703727722168, + "learning_rate": 1.4707027412688698e-05, + "loss": 0.22, + "step": 17420, + "teacher_loss": 0.1751675307750702 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.32937946915626526, + "learning_rate": 1.4704756599565494e-05, + "loss": 0.1813, + "step": 17421, + "teacher_loss": 0.1648046374320984 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4378325939178467, + "learning_rate": 1.4702485793211328e-05, + "loss": 0.1926, + "step": 17422, + "teacher_loss": 0.16530011594295502 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.3265809714794159, + "learning_rate": 1.470021499367828e-05, + "loss": 0.1882, + "step": 17423, + "teacher_loss": 0.1728629171848297 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5963238477706909, + "learning_rate": 1.4697944201018398e-05, + "loss": 0.2664, + "step": 17424, + "teacher_loss": 0.22971251606941223 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.38765954971313477, + "learning_rate": 1.4695673415283747e-05, + "loss": 0.2882, + "step": 17425, + "teacher_loss": 0.2770967483520508 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.29905790090560913, + "learning_rate": 1.4693402636526398e-05, + "loss": 0.2809, + "step": 17426, + "teacher_loss": 0.2788505554199219 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.30916622281074524, + "learning_rate": 1.4691131864798405e-05, + "loss": 0.2316, + "step": 17427, + "teacher_loss": 0.22302895784378052 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.9052622318267822, + "learning_rate": 1.4688861100151833e-05, + "loss": 0.3378, + "step": 17428, + "teacher_loss": 0.2747696042060852 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.527613639831543, + "learning_rate": 1.4686590342638739e-05, + "loss": 0.3534, + "step": 17429, + "teacher_loss": 0.33403724431991577 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.3878132700920105, + "learning_rate": 1.4684319592311193e-05, + "loss": 0.1779, + "step": 17430, + "teacher_loss": 0.15452352166175842 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.30898457765579224, + "learning_rate": 1.4682048849221258e-05, + "loss": 0.2685, + "step": 17431, + "teacher_loss": 0.26400619745254517 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4095154106616974, + "learning_rate": 1.4679778113420976e-05, + "loss": 0.2569, + "step": 17432, + "teacher_loss": 0.2399575114250183 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4363976716995239, + "learning_rate": 1.4677507384962433e-05, + "loss": 0.2646, + "step": 17433, + "teacher_loss": 0.24546974897384644 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5808266997337341, + "learning_rate": 1.4675236663897675e-05, + "loss": 0.2284, + "step": 17434, + "teacher_loss": 0.18919017910957336 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.508952796459198, + "learning_rate": 1.4672965950278763e-05, + "loss": 0.2901, + "step": 17435, + "teacher_loss": 0.26581472158432007 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4463152289390564, + "learning_rate": 1.4670695244157767e-05, + "loss": 0.2683, + "step": 17436, + "teacher_loss": 0.24847060441970825 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.8172603845596313, + "learning_rate": 1.466842454558674e-05, + "loss": 0.2527, + "step": 17437, + "teacher_loss": 0.19002526998519897 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5451173186302185, + "learning_rate": 1.466615385461774e-05, + "loss": 0.2468, + "step": 17438, + "teacher_loss": 0.2136991024017334 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.37743890285491943, + "learning_rate": 1.4663883171302836e-05, + "loss": 0.2211, + "step": 17439, + "teacher_loss": 0.20369601249694824 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4452858567237854, + "learning_rate": 1.4661612495694082e-05, + "loss": 0.1903, + "step": 17440, + "teacher_loss": 0.16195917129516602 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5688092112541199, + "learning_rate": 1.4659341827843537e-05, + "loss": 0.272, + "step": 17441, + "teacher_loss": 0.23905879259109497 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4404653310775757, + "learning_rate": 1.4657071167803266e-05, + "loss": 0.2191, + "step": 17442, + "teacher_loss": 0.1944507658481598 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.7133147716522217, + "learning_rate": 1.4654800515625322e-05, + "loss": 0.2844, + "step": 17443, + "teacher_loss": 0.2367367148399353 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.15977270901203156, + "learning_rate": 1.4652529871361766e-05, + "loss": 0.2054, + "step": 17444, + "teacher_loss": 0.2104422003030777 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.9102276563644409, + "learning_rate": 1.4650259235064662e-05, + "loss": 0.4153, + "step": 17445, + "teacher_loss": 0.360293447971344 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.8373477458953857, + "learning_rate": 1.4647988606786068e-05, + "loss": 0.2971, + "step": 17446, + "teacher_loss": 0.2370269000530243 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.316379189491272, + "learning_rate": 1.464571798657803e-05, + "loss": 0.1559, + "step": 17447, + "teacher_loss": 0.13809096813201904 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5214146971702576, + "learning_rate": 1.4643447374492625e-05, + "loss": 0.3668, + "step": 17448, + "teacher_loss": 0.3495672941207886 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4202248454093933, + "learning_rate": 1.46411767705819e-05, + "loss": 0.191, + "step": 17449, + "teacher_loss": 0.1654840111732483 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.4848083555698395, + "learning_rate": 1.4638906174897917e-05, + "loss": 0.2988, + "step": 17450, + "teacher_loss": 0.2780914306640625 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.8171603679656982, + "learning_rate": 1.4636635587492729e-05, + "loss": 0.2549, + "step": 17451, + "teacher_loss": 0.19246980547904968 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.38159215450286865, + "learning_rate": 1.46343650084184e-05, + "loss": 0.1923, + "step": 17452, + "teacher_loss": 0.17128071188926697 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.5327211618423462, + "learning_rate": 1.4632094437726988e-05, + "loss": 0.2124, + "step": 17453, + "teacher_loss": 0.176824152469635 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.1735510230064392, + "learning_rate": 1.462982387547054e-05, + "loss": 0.2063, + "step": 17454, + "teacher_loss": 0.20999057590961456 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.28142133355140686, + "learning_rate": 1.4627553321701128e-05, + "loss": 0.1695, + "step": 17455, + "teacher_loss": 0.15701445937156677 + }, + { + "compression_loss": 0.0, + "epoch": 3.15, + "label_loss": 0.20482710003852844, + "learning_rate": 1.4625282776470803e-05, + "loss": 0.1656, + "step": 17456, + "teacher_loss": 0.16128851473331451 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.19681161642074585, + "learning_rate": 1.462301223983161e-05, + "loss": 0.2101, + "step": 17457, + "teacher_loss": 0.21153885126113892 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.5013963580131531, + "learning_rate": 1.4620741711835627e-05, + "loss": 0.2437, + "step": 17458, + "teacher_loss": 0.21507784724235535 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.621864914894104, + "learning_rate": 1.4618471192534895e-05, + "loss": 0.5705, + "step": 17459, + "teacher_loss": 0.5648020505905151 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2799007296562195, + "learning_rate": 1.4616200681981472e-05, + "loss": 0.216, + "step": 17460, + "teacher_loss": 0.20892545580863953 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.8843612670898438, + "learning_rate": 1.4613930180227421e-05, + "loss": 0.1911, + "step": 17461, + "teacher_loss": 0.11408155411481857 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.14197197556495667, + "learning_rate": 1.461165968732479e-05, + "loss": 0.1769, + "step": 17462, + "teacher_loss": 0.1807783544063568 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.31700652837753296, + "learning_rate": 1.460938920332564e-05, + "loss": 0.1736, + "step": 17463, + "teacher_loss": 0.15770840644836426 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3048824071884155, + "learning_rate": 1.4607118728282025e-05, + "loss": 0.1667, + "step": 17464, + "teacher_loss": 0.15134331583976746 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3354547917842865, + "learning_rate": 1.4604848262245999e-05, + "loss": 0.2603, + "step": 17465, + "teacher_loss": 0.2519644498825073 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.1901552826166153, + "learning_rate": 1.4602577805269615e-05, + "loss": 0.1651, + "step": 17466, + "teacher_loss": 0.16234713792800903 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.38609468936920166, + "learning_rate": 1.4600307357404933e-05, + "loss": 0.1979, + "step": 17467, + "teacher_loss": 0.1770011931657791 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.18647032976150513, + "learning_rate": 1.4598036918704006e-05, + "loss": 0.2291, + "step": 17468, + "teacher_loss": 0.23385076224803925 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.5232111215591431, + "learning_rate": 1.4595766489218892e-05, + "loss": 0.3357, + "step": 17469, + "teacher_loss": 0.3148711621761322 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.8115173578262329, + "learning_rate": 1.4593496069001629e-05, + "loss": 0.2884, + "step": 17470, + "teacher_loss": 0.23026251792907715 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.8417997360229492, + "learning_rate": 1.4591225658104292e-05, + "loss": 0.3887, + "step": 17471, + "teacher_loss": 0.3383626341819763 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.965733528137207, + "learning_rate": 1.4588955256578922e-05, + "loss": 0.3231, + "step": 17472, + "teacher_loss": 0.2516665756702423 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3409142792224884, + "learning_rate": 1.4586684864477572e-05, + "loss": 0.3346, + "step": 17473, + "teacher_loss": 0.3339143991470337 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3837571442127228, + "learning_rate": 1.45844144818523e-05, + "loss": 0.2486, + "step": 17474, + "teacher_loss": 0.23362372815608978 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.7640320062637329, + "learning_rate": 1.4582144108755162e-05, + "loss": 0.5399, + "step": 17475, + "teacher_loss": 0.5149547457695007 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.16650408506393433, + "learning_rate": 1.4579873745238202e-05, + "loss": 0.1921, + "step": 17476, + "teacher_loss": 0.1949939727783203 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.6649729013442993, + "learning_rate": 1.4577603391353481e-05, + "loss": 0.2945, + "step": 17477, + "teacher_loss": 0.2533126175403595 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.8058868646621704, + "learning_rate": 1.4575333047153046e-05, + "loss": 0.3139, + "step": 17478, + "teacher_loss": 0.2591917812824249 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.5767667889595032, + "learning_rate": 1.457306271268895e-05, + "loss": 0.3302, + "step": 17479, + "teacher_loss": 0.30282142758369446 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.4302701950073242, + "learning_rate": 1.457079238801325e-05, + "loss": 0.2949, + "step": 17480, + "teacher_loss": 0.2799092233181 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2463553547859192, + "learning_rate": 1.4568522073177993e-05, + "loss": 0.1922, + "step": 17481, + "teacher_loss": 0.1862202286720276 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.7751959562301636, + "learning_rate": 1.4566251768235225e-05, + "loss": 0.281, + "step": 17482, + "teacher_loss": 0.22610455751419067 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.505419909954071, + "learning_rate": 1.456398147323701e-05, + "loss": 0.288, + "step": 17483, + "teacher_loss": 0.2638220191001892 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.6560220718383789, + "learning_rate": 1.4561711188235393e-05, + "loss": 0.2451, + "step": 17484, + "teacher_loss": 0.1994813233613968 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.4274773895740509, + "learning_rate": 1.4559440913282416e-05, + "loss": 0.2408, + "step": 17485, + "teacher_loss": 0.22002321481704712 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.39207181334495544, + "learning_rate": 1.4557170648430148e-05, + "loss": 0.2943, + "step": 17486, + "teacher_loss": 0.2834526300430298 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.5216990113258362, + "learning_rate": 1.4554900393730625e-05, + "loss": 0.2427, + "step": 17487, + "teacher_loss": 0.21164673566818237 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.6370932459831238, + "learning_rate": 1.45526301492359e-05, + "loss": 0.2455, + "step": 17488, + "teacher_loss": 0.20193777978420258 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3933643698692322, + "learning_rate": 1.4550359914998027e-05, + "loss": 0.2547, + "step": 17489, + "teacher_loss": 0.2392462193965912 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.620413064956665, + "learning_rate": 1.4548089691069054e-05, + "loss": 0.3088, + "step": 17490, + "teacher_loss": 0.274222195148468 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 1.4505033493041992, + "learning_rate": 1.4545819477501028e-05, + "loss": 0.4155, + "step": 17491, + "teacher_loss": 0.3004941940307617 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.6978291869163513, + "learning_rate": 1.4543549274345997e-05, + "loss": 0.3205, + "step": 17492, + "teacher_loss": 0.2786010801792145 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3535919487476349, + "learning_rate": 1.4541279081656016e-05, + "loss": 0.5062, + "step": 17493, + "teacher_loss": 0.5232028365135193 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.664519190788269, + "learning_rate": 1.4539008899483133e-05, + "loss": 0.288, + "step": 17494, + "teacher_loss": 0.2461758404970169 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.19022926688194275, + "learning_rate": 1.4536738727879384e-05, + "loss": 0.1568, + "step": 17495, + "teacher_loss": 0.15306758880615234 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.493743896484375, + "learning_rate": 1.4534468566896838e-05, + "loss": 0.2899, + "step": 17496, + "teacher_loss": 0.2673032879829407 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2214827686548233, + "learning_rate": 1.4532198416587528e-05, + "loss": 0.1983, + "step": 17497, + "teacher_loss": 0.19575023651123047 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.4366607069969177, + "learning_rate": 1.4529928277003502e-05, + "loss": 0.2351, + "step": 17498, + "teacher_loss": 0.2126588672399521 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.5405682921409607, + "learning_rate": 1.4527658148196815e-05, + "loss": 0.2872, + "step": 17499, + "teacher_loss": 0.25903475284576416 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2243812382221222, + "learning_rate": 1.4525388030219512e-05, + "loss": 0.2279, + "step": 17500, + "teacher_loss": 0.22830086946487427 + }, + { + "epoch": 3.16, + "eval_exact_match": 79.46073793755913, + "eval_f1": 87.098412229066, + "step": 17500 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3075556755065918, + "learning_rate": 1.4523117923123633e-05, + "loss": 0.2043, + "step": 17501, + "teacher_loss": 0.19281215965747833 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2748276889324188, + "learning_rate": 1.4520847826961235e-05, + "loss": 0.244, + "step": 17502, + "teacher_loss": 0.24052734673023224 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2654651403427124, + "learning_rate": 1.4518577741784359e-05, + "loss": 0.2046, + "step": 17503, + "teacher_loss": 0.19788596034049988 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 1.288184404373169, + "learning_rate": 1.4516307667645048e-05, + "loss": 0.6536, + "step": 17504, + "teacher_loss": 0.583102822303772 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.7251483798027039, + "learning_rate": 1.4514037604595356e-05, + "loss": 0.2568, + "step": 17505, + "teacher_loss": 0.20476871728897095 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.8096426725387573, + "learning_rate": 1.451176755268733e-05, + "loss": 0.3424, + "step": 17506, + "teacher_loss": 0.2904762029647827 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.3818186819553375, + "learning_rate": 1.4509497511972997e-05, + "loss": 0.2263, + "step": 17507, + "teacher_loss": 0.20907238125801086 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.26641252636909485, + "learning_rate": 1.4507227482504427e-05, + "loss": 0.2059, + "step": 17508, + "teacher_loss": 0.19913128018379211 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.39385098218917847, + "learning_rate": 1.450495746433365e-05, + "loss": 0.2183, + "step": 17509, + "teacher_loss": 0.19881710410118103 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.5063592195510864, + "learning_rate": 1.4502687457512714e-05, + "loss": 0.2333, + "step": 17510, + "teacher_loss": 0.2030005156993866 + }, + { + "compression_loss": 0.0, + "epoch": 3.16, + "label_loss": 0.2908146381378174, + "learning_rate": 1.4500417462093662e-05, + "loss": 0.2455, + "step": 17511, + "teacher_loss": 0.24045629799365997 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3965708613395691, + "learning_rate": 1.449814747812854e-05, + "loss": 0.1726, + "step": 17512, + "teacher_loss": 0.14776010811328888 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.1032509133219719, + "learning_rate": 1.4495877505669393e-05, + "loss": 0.1683, + "step": 17513, + "teacher_loss": 0.1755698323249817 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3869970142841339, + "learning_rate": 1.449360754476826e-05, + "loss": 0.352, + "step": 17514, + "teacher_loss": 0.3481558561325073 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3167208433151245, + "learning_rate": 1.4491337595477192e-05, + "loss": 0.1842, + "step": 17515, + "teacher_loss": 0.16949915885925293 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3707008361816406, + "learning_rate": 1.4489067657848226e-05, + "loss": 0.2464, + "step": 17516, + "teacher_loss": 0.23258526623249054 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.40697360038757324, + "learning_rate": 1.4486797731933404e-05, + "loss": 0.2671, + "step": 17517, + "teacher_loss": 0.251510888338089 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.6458220481872559, + "learning_rate": 1.4484527817784776e-05, + "loss": 0.2548, + "step": 17518, + "teacher_loss": 0.211335688829422 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5201655626296997, + "learning_rate": 1.4482257915454383e-05, + "loss": 0.284, + "step": 17519, + "teacher_loss": 0.2577097415924072 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.9687808156013489, + "learning_rate": 1.4479988024994252e-05, + "loss": 0.3117, + "step": 17520, + "teacher_loss": 0.23872385919094086 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.2458811104297638, + "learning_rate": 1.4477718146456446e-05, + "loss": 0.2148, + "step": 17521, + "teacher_loss": 0.21139711141586304 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.6077238321304321, + "learning_rate": 1.4475448279892996e-05, + "loss": 0.2542, + "step": 17522, + "teacher_loss": 0.21488723158836365 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5509481430053711, + "learning_rate": 1.447317842535594e-05, + "loss": 0.3242, + "step": 17523, + "teacher_loss": 0.299039751291275 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.15414343774318695, + "learning_rate": 1.4470908582897328e-05, + "loss": 0.1858, + "step": 17524, + "teacher_loss": 0.1893196403980255 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 1.639272689819336, + "learning_rate": 1.4468638752569193e-05, + "loss": 0.4238, + "step": 17525, + "teacher_loss": 0.2887035608291626 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.36097022891044617, + "learning_rate": 1.4466368934423578e-05, + "loss": 0.1981, + "step": 17526, + "teacher_loss": 0.17999424040317535 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5599234104156494, + "learning_rate": 1.4464099128512525e-05, + "loss": 0.1892, + "step": 17527, + "teacher_loss": 0.14800825715065002 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.49424654245376587, + "learning_rate": 1.4461829334888073e-05, + "loss": 0.3032, + "step": 17528, + "teacher_loss": 0.28192079067230225 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5125178694725037, + "learning_rate": 1.4459559553602256e-05, + "loss": 0.2355, + "step": 17529, + "teacher_loss": 0.20472539961338043 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.7114384174346924, + "learning_rate": 1.4457289784707122e-05, + "loss": 0.2821, + "step": 17530, + "teacher_loss": 0.23443858325481415 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.33134832978248596, + "learning_rate": 1.445502002825471e-05, + "loss": 0.2293, + "step": 17531, + "teacher_loss": 0.2179253101348877 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5935012698173523, + "learning_rate": 1.4452750284297052e-05, + "loss": 0.356, + "step": 17532, + "teacher_loss": 0.3296297490596771 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.30534982681274414, + "learning_rate": 1.4450480552886184e-05, + "loss": 0.3293, + "step": 17533, + "teacher_loss": 0.3319437503814697 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.9275591373443604, + "learning_rate": 1.4448210834074155e-05, + "loss": 0.5499, + "step": 17534, + "teacher_loss": 0.507900595664978 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.30951058864593506, + "learning_rate": 1.4445941127912995e-05, + "loss": 0.2661, + "step": 17535, + "teacher_loss": 0.2612661123275757 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.6395387649536133, + "learning_rate": 1.4443671434454743e-05, + "loss": 0.2865, + "step": 17536, + "teacher_loss": 0.24729005992412567 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.22077274322509766, + "learning_rate": 1.4441401753751438e-05, + "loss": 0.2074, + "step": 17537, + "teacher_loss": 0.20587016642093658 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.35192614793777466, + "learning_rate": 1.4439132085855117e-05, + "loss": 0.178, + "step": 17538, + "teacher_loss": 0.15865057706832886 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.7866694331169128, + "learning_rate": 1.4436862430817813e-05, + "loss": 0.2815, + "step": 17539, + "teacher_loss": 0.22539003193378448 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.23134806752204895, + "learning_rate": 1.4434592788691568e-05, + "loss": 0.1736, + "step": 17540, + "teacher_loss": 0.16715869307518005 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.6995489597320557, + "learning_rate": 1.4432323159528415e-05, + "loss": 0.2038, + "step": 17541, + "teacher_loss": 0.14872127771377563 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5319263935089111, + "learning_rate": 1.4430053543380388e-05, + "loss": 0.3348, + "step": 17542, + "teacher_loss": 0.3129148781299591 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.7178807258605957, + "learning_rate": 1.4427783940299526e-05, + "loss": 0.3609, + "step": 17543, + "teacher_loss": 0.32126104831695557 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.8869133591651917, + "learning_rate": 1.4425514350337866e-05, + "loss": 0.2862, + "step": 17544, + "teacher_loss": 0.21946462988853455 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.31641507148742676, + "learning_rate": 1.442324477354743e-05, + "loss": 0.1934, + "step": 17545, + "teacher_loss": 0.17970064282417297 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.6466572284698486, + "learning_rate": 1.4420975209980272e-05, + "loss": 0.2663, + "step": 17546, + "teacher_loss": 0.22409173846244812 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.282548725605011, + "learning_rate": 1.4418705659688414e-05, + "loss": 0.1863, + "step": 17547, + "teacher_loss": 0.17564553022384644 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3805087208747864, + "learning_rate": 1.4416436122723888e-05, + "loss": 0.187, + "step": 17548, + "teacher_loss": 0.1654852032661438 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.20888382196426392, + "learning_rate": 1.4414166599138737e-05, + "loss": 0.1874, + "step": 17549, + "teacher_loss": 0.18498674035072327 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.7838221788406372, + "learning_rate": 1.4411897088984987e-05, + "loss": 0.4258, + "step": 17550, + "teacher_loss": 0.38604599237442017 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.614762008190155, + "learning_rate": 1.440962759231467e-05, + "loss": 0.2088, + "step": 17551, + "teacher_loss": 0.1636539101600647 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3760119676589966, + "learning_rate": 1.4407358109179827e-05, + "loss": 0.2224, + "step": 17552, + "teacher_loss": 0.20533594489097595 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.30171364545822144, + "learning_rate": 1.4405088639632486e-05, + "loss": 0.2676, + "step": 17553, + "teacher_loss": 0.2638113498687744 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3653731942176819, + "learning_rate": 1.4402819183724683e-05, + "loss": 0.3816, + "step": 17554, + "teacher_loss": 0.3833754062652588 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.9574127197265625, + "learning_rate": 1.4400549741508434e-05, + "loss": 0.3138, + "step": 17555, + "teacher_loss": 0.24224044382572174 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.466407835483551, + "learning_rate": 1.4398280313035794e-05, + "loss": 0.237, + "step": 17556, + "teacher_loss": 0.2115379273891449 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3493289649486542, + "learning_rate": 1.4396010898358778e-05, + "loss": 0.2048, + "step": 17557, + "teacher_loss": 0.188791424036026 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.27235856652259827, + "learning_rate": 1.4393741497529416e-05, + "loss": 0.2044, + "step": 17558, + "teacher_loss": 0.19688169658184052 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.2390129715204239, + "learning_rate": 1.439147211059975e-05, + "loss": 0.1695, + "step": 17559, + "teacher_loss": 0.1617770791053772 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3681508004665375, + "learning_rate": 1.4389202737621801e-05, + "loss": 0.1585, + "step": 17560, + "teacher_loss": 0.1352112889289856 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5449030995368958, + "learning_rate": 1.4386933378647602e-05, + "loss": 0.3457, + "step": 17561, + "teacher_loss": 0.3235843777656555 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5737286806106567, + "learning_rate": 1.4384664033729185e-05, + "loss": 0.2354, + "step": 17562, + "teacher_loss": 0.1977914422750473 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.26882851123809814, + "learning_rate": 1.4382394702918575e-05, + "loss": 0.2597, + "step": 17563, + "teacher_loss": 0.2586757242679596 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.5868334174156189, + "learning_rate": 1.43801253862678e-05, + "loss": 0.2298, + "step": 17564, + "teacher_loss": 0.19010649621486664 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.3118050694465637, + "learning_rate": 1.4377856083828897e-05, + "loss": 0.1804, + "step": 17565, + "teacher_loss": 0.16577717661857605 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.7272975444793701, + "learning_rate": 1.4375586795653887e-05, + "loss": 0.3331, + "step": 17566, + "teacher_loss": 0.2893384099006653 + }, + { + "compression_loss": 0.0, + "epoch": 3.17, + "label_loss": 0.18259891867637634, + "learning_rate": 1.4373317521794796e-05, + "loss": 0.2626, + "step": 17567, + "teacher_loss": 0.27145156264305115 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.40485966205596924, + "learning_rate": 1.437104826230366e-05, + "loss": 0.1812, + "step": 17568, + "teacher_loss": 0.1563625931739807 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.36247414350509644, + "learning_rate": 1.4368779017232504e-05, + "loss": 0.2012, + "step": 17569, + "teacher_loss": 0.18328312039375305 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.3005625307559967, + "learning_rate": 1.4366509786633342e-05, + "loss": 0.2165, + "step": 17570, + "teacher_loss": 0.20714882016181946 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.547737717628479, + "learning_rate": 1.4364240570558224e-05, + "loss": 0.3317, + "step": 17571, + "teacher_loss": 0.3076656758785248 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5777987241744995, + "learning_rate": 1.4361971369059158e-05, + "loss": 0.2132, + "step": 17572, + "teacher_loss": 0.17271798849105835 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.2663493752479553, + "learning_rate": 1.4359702182188176e-05, + "loss": 0.1766, + "step": 17573, + "teacher_loss": 0.16660578548908234 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.4218781590461731, + "learning_rate": 1.4357433009997299e-05, + "loss": 0.2216, + "step": 17574, + "teacher_loss": 0.1993720978498459 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.21224823594093323, + "learning_rate": 1.435516385253856e-05, + "loss": 0.2113, + "step": 17575, + "teacher_loss": 0.21118026971817017 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.21006067097187042, + "learning_rate": 1.4352894709863983e-05, + "loss": 0.1483, + "step": 17576, + "teacher_loss": 0.14138223230838776 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.48695528507232666, + "learning_rate": 1.4350625582025584e-05, + "loss": 0.2328, + "step": 17577, + "teacher_loss": 0.2045508623123169 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.6032968759536743, + "learning_rate": 1.4348356469075399e-05, + "loss": 0.2474, + "step": 17578, + "teacher_loss": 0.20787540078163147 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5241185426712036, + "learning_rate": 1.4346087371065448e-05, + "loss": 0.2145, + "step": 17579, + "teacher_loss": 0.1801140457391739 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.4550755023956299, + "learning_rate": 1.4343818288047744e-05, + "loss": 0.2798, + "step": 17580, + "teacher_loss": 0.2603676915168762 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.49509015679359436, + "learning_rate": 1.4341549220074329e-05, + "loss": 0.2559, + "step": 17581, + "teacher_loss": 0.2293020635843277 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5839067697525024, + "learning_rate": 1.4339280167197213e-05, + "loss": 0.3084, + "step": 17582, + "teacher_loss": 0.27779239416122437 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.7676784992218018, + "learning_rate": 1.4337011129468419e-05, + "loss": 0.4795, + "step": 17583, + "teacher_loss": 0.44750893115997314 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.6214165687561035, + "learning_rate": 1.4334742106939975e-05, + "loss": 0.238, + "step": 17584, + "teacher_loss": 0.19544601440429688 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5743802785873413, + "learning_rate": 1.43324730996639e-05, + "loss": 0.4628, + "step": 17585, + "teacher_loss": 0.45037519931793213 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.19376444816589355, + "learning_rate": 1.4330204107692212e-05, + "loss": 0.1461, + "step": 17586, + "teacher_loss": 0.14078941941261292 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.31987154483795166, + "learning_rate": 1.4327935131076939e-05, + "loss": 0.2716, + "step": 17587, + "teacher_loss": 0.2662258744239807 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 1.3952951431274414, + "learning_rate": 1.4325666169870099e-05, + "loss": 0.6551, + "step": 17588, + "teacher_loss": 0.572895884513855 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5303844213485718, + "learning_rate": 1.4323397224123708e-05, + "loss": 0.277, + "step": 17589, + "teacher_loss": 0.24882817268371582 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.39242875576019287, + "learning_rate": 1.4321128293889793e-05, + "loss": 0.2433, + "step": 17590, + "teacher_loss": 0.22669702768325806 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.3252655267715454, + "learning_rate": 1.4318859379220371e-05, + "loss": 0.2031, + "step": 17591, + "teacher_loss": 0.18953226506710052 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.7132079601287842, + "learning_rate": 1.4316590480167459e-05, + "loss": 0.3375, + "step": 17592, + "teacher_loss": 0.29573991894721985 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.47425180673599243, + "learning_rate": 1.431432159678308e-05, + "loss": 0.3001, + "step": 17593, + "teacher_loss": 0.28070133924484253 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.8338819742202759, + "learning_rate": 1.4312052729119256e-05, + "loss": 0.7873, + "step": 17594, + "teacher_loss": 0.782118558883667 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.9233233332633972, + "learning_rate": 1.4309783877227996e-05, + "loss": 0.2628, + "step": 17595, + "teacher_loss": 0.18941757082939148 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.25967979431152344, + "learning_rate": 1.430751504116132e-05, + "loss": 0.2334, + "step": 17596, + "teacher_loss": 0.23048532009124756 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.554546058177948, + "learning_rate": 1.430524622097125e-05, + "loss": 0.2532, + "step": 17597, + "teacher_loss": 0.2197268158197403 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.33481258153915405, + "learning_rate": 1.4302977416709802e-05, + "loss": 0.2784, + "step": 17598, + "teacher_loss": 0.27212274074554443 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.3703487515449524, + "learning_rate": 1.4300708628428989e-05, + "loss": 0.1704, + "step": 17599, + "teacher_loss": 0.14821605384349823 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.10718933492898941, + "learning_rate": 1.4298439856180832e-05, + "loss": 0.1775, + "step": 17600, + "teacher_loss": 0.1852729320526123 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5222270488739014, + "learning_rate": 1.4296171100017348e-05, + "loss": 0.2145, + "step": 17601, + "teacher_loss": 0.18033930659294128 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.8295646905899048, + "learning_rate": 1.4293902359990547e-05, + "loss": 0.2802, + "step": 17602, + "teacher_loss": 0.21916356682777405 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.34876590967178345, + "learning_rate": 1.429163363615245e-05, + "loss": 0.1961, + "step": 17603, + "teacher_loss": 0.17908209562301636 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.6965045928955078, + "learning_rate": 1.4289364928555073e-05, + "loss": 0.279, + "step": 17604, + "teacher_loss": 0.23265808820724487 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.2956174314022064, + "learning_rate": 1.428709623725042e-05, + "loss": 0.2945, + "step": 17605, + "teacher_loss": 0.29439499974250793 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 1.3945338726043701, + "learning_rate": 1.4284827562290521e-05, + "loss": 0.4184, + "step": 17606, + "teacher_loss": 0.30990782380104065 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.2730444371700287, + "learning_rate": 1.428255890372738e-05, + "loss": 0.1876, + "step": 17607, + "teacher_loss": 0.17813752591609955 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.07789207249879837, + "learning_rate": 1.4280290261613002e-05, + "loss": 0.1384, + "step": 17608, + "teacher_loss": 0.14511734247207642 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.17575177550315857, + "learning_rate": 1.4278021635999424e-05, + "loss": 0.1828, + "step": 17609, + "teacher_loss": 0.18362730741500854 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.4916270971298218, + "learning_rate": 1.427575302693864e-05, + "loss": 0.2387, + "step": 17610, + "teacher_loss": 0.210578054189682 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.9475731253623962, + "learning_rate": 1.4273484434482665e-05, + "loss": 0.3271, + "step": 17611, + "teacher_loss": 0.25814807415008545 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.19031646847724915, + "learning_rate": 1.4271215858683519e-05, + "loss": 0.185, + "step": 17612, + "teacher_loss": 0.18441042304039001 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.3296540379524231, + "learning_rate": 1.4268947299593206e-05, + "loss": 0.2503, + "step": 17613, + "teacher_loss": 0.2414407730102539 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.5212024450302124, + "learning_rate": 1.4266678757263743e-05, + "loss": 0.2743, + "step": 17614, + "teacher_loss": 0.24687375128269196 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.2577836811542511, + "learning_rate": 1.4264410231747128e-05, + "loss": 0.182, + "step": 17615, + "teacher_loss": 0.17355065047740936 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.4391128718852997, + "learning_rate": 1.4262141723095387e-05, + "loss": 0.248, + "step": 17616, + "teacher_loss": 0.22681915760040283 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.4718478322029114, + "learning_rate": 1.4259873231360527e-05, + "loss": 0.1972, + "step": 17617, + "teacher_loss": 0.1667015552520752 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.15968924760818481, + "learning_rate": 1.4257604756594548e-05, + "loss": 0.1436, + "step": 17618, + "teacher_loss": 0.1418251395225525 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.6067183017730713, + "learning_rate": 1.4255336298849474e-05, + "loss": 0.3592, + "step": 17619, + "teacher_loss": 0.3316514194011688 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.3493698239326477, + "learning_rate": 1.4253067858177301e-05, + "loss": 0.1859, + "step": 17620, + "teacher_loss": 0.16778475046157837 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 1.3777087926864624, + "learning_rate": 1.4250799434630042e-05, + "loss": 0.3961, + "step": 17621, + "teacher_loss": 0.28703343868255615 + }, + { + "compression_loss": 0.0, + "epoch": 3.18, + "label_loss": 0.37302398681640625, + "learning_rate": 1.4248531028259708e-05, + "loss": 0.1679, + "step": 17622, + "teacher_loss": 0.14505496621131897 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.4962156116962433, + "learning_rate": 1.4246262639118304e-05, + "loss": 0.2602, + "step": 17623, + "teacher_loss": 0.23398357629776 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.4226142168045044, + "learning_rate": 1.4243994267257836e-05, + "loss": 0.2567, + "step": 17624, + "teacher_loss": 0.2382708191871643 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3812740743160248, + "learning_rate": 1.4241725912730315e-05, + "loss": 0.2138, + "step": 17625, + "teacher_loss": 0.19524195790290833 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5385949015617371, + "learning_rate": 1.4239457575587747e-05, + "loss": 0.2702, + "step": 17626, + "teacher_loss": 0.24038901925086975 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3025643229484558, + "learning_rate": 1.4237189255882131e-05, + "loss": 0.2356, + "step": 17627, + "teacher_loss": 0.22810712456703186 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.21577215194702148, + "learning_rate": 1.4234920953665482e-05, + "loss": 0.212, + "step": 17628, + "teacher_loss": 0.21161997318267822 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.37343868613243103, + "learning_rate": 1.4232652668989805e-05, + "loss": 0.2554, + "step": 17629, + "teacher_loss": 0.24230524897575378 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.632673978805542, + "learning_rate": 1.4230384401907092e-05, + "loss": 0.3659, + "step": 17630, + "teacher_loss": 0.33627569675445557 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.7718628644943237, + "learning_rate": 1.4228116152469368e-05, + "loss": 0.2987, + "step": 17631, + "teacher_loss": 0.24612393975257874 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3600156903266907, + "learning_rate": 1.4225847920728622e-05, + "loss": 0.2183, + "step": 17632, + "teacher_loss": 0.20254400372505188 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.45248568058013916, + "learning_rate": 1.4223579706736857e-05, + "loss": 0.3043, + "step": 17633, + "teacher_loss": 0.28782743215560913 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.33973008394241333, + "learning_rate": 1.4221311510546089e-05, + "loss": 0.1713, + "step": 17634, + "teacher_loss": 0.15261170268058777 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5740082263946533, + "learning_rate": 1.4219043332208312e-05, + "loss": 0.3057, + "step": 17635, + "teacher_loss": 0.27593904733657837 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.6180772185325623, + "learning_rate": 1.4216775171775531e-05, + "loss": 0.3007, + "step": 17636, + "teacher_loss": 0.26542431116104126 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.4716870188713074, + "learning_rate": 1.421450702929974e-05, + "loss": 0.1933, + "step": 17637, + "teacher_loss": 0.1623629480600357 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5338704586029053, + "learning_rate": 1.4212238904832956e-05, + "loss": 0.1825, + "step": 17638, + "teacher_loss": 0.14342273771762848 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5109022855758667, + "learning_rate": 1.4209970798427167e-05, + "loss": 0.309, + "step": 17639, + "teacher_loss": 0.2865491807460785 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.7189458608627319, + "learning_rate": 1.420770271013438e-05, + "loss": 0.3553, + "step": 17640, + "teacher_loss": 0.31493479013442993 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.44520998001098633, + "learning_rate": 1.4205434640006595e-05, + "loss": 0.3036, + "step": 17641, + "teacher_loss": 0.2878129482269287 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.31332480907440186, + "learning_rate": 1.4203166588095816e-05, + "loss": 0.2407, + "step": 17642, + "teacher_loss": 0.23264554142951965 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.8516744375228882, + "learning_rate": 1.4200898554454028e-05, + "loss": 0.2043, + "step": 17643, + "teacher_loss": 0.1324230283498764 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5234546661376953, + "learning_rate": 1.4198630539133249e-05, + "loss": 0.2783, + "step": 17644, + "teacher_loss": 0.2510383725166321 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 1.0129578113555908, + "learning_rate": 1.4196362542185469e-05, + "loss": 0.2908, + "step": 17645, + "teacher_loss": 0.2105954885482788 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.41638368368148804, + "learning_rate": 1.419409456366268e-05, + "loss": 0.1926, + "step": 17646, + "teacher_loss": 0.16770394146442413 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3812369108200073, + "learning_rate": 1.4191826603616891e-05, + "loss": 0.2008, + "step": 17647, + "teacher_loss": 0.1807435303926468 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.4451167583465576, + "learning_rate": 1.4189558662100094e-05, + "loss": 0.1753, + "step": 17648, + "teacher_loss": 0.14527684450149536 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3233838677406311, + "learning_rate": 1.4187290739164285e-05, + "loss": 0.1695, + "step": 17649, + "teacher_loss": 0.15235596895217896 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 1.3505933284759521, + "learning_rate": 1.4185022834861466e-05, + "loss": 0.3868, + "step": 17650, + "teacher_loss": 0.2797505259513855 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3997369408607483, + "learning_rate": 1.4182754949243629e-05, + "loss": 0.2755, + "step": 17651, + "teacher_loss": 0.26170945167541504 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3941057324409485, + "learning_rate": 1.4180487082362767e-05, + "loss": 0.2674, + "step": 17652, + "teacher_loss": 0.2533155679702759 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.14867646992206573, + "learning_rate": 1.4178219234270885e-05, + "loss": 0.1983, + "step": 17653, + "teacher_loss": 0.20384220778942108 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.26068776845932007, + "learning_rate": 1.4175951405019973e-05, + "loss": 0.1861, + "step": 17654, + "teacher_loss": 0.17776955664157867 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3619043231010437, + "learning_rate": 1.4173683594662016e-05, + "loss": 0.1683, + "step": 17655, + "teacher_loss": 0.14680679142475128 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3401820659637451, + "learning_rate": 1.4171415803249027e-05, + "loss": 0.17, + "step": 17656, + "teacher_loss": 0.1510607898235321 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.497144877910614, + "learning_rate": 1.4169148030832984e-05, + "loss": 0.1915, + "step": 17657, + "teacher_loss": 0.15756356716156006 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5746393203735352, + "learning_rate": 1.4166880277465888e-05, + "loss": 0.5828, + "step": 17658, + "teacher_loss": 0.5836811661720276 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3819441497325897, + "learning_rate": 1.4164612543199725e-05, + "loss": 0.176, + "step": 17659, + "teacher_loss": 0.1530633270740509 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.24425393342971802, + "learning_rate": 1.4162344828086496e-05, + "loss": 0.2048, + "step": 17660, + "teacher_loss": 0.20042669773101807 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5154309868812561, + "learning_rate": 1.4160077132178188e-05, + "loss": 0.2846, + "step": 17661, + "teacher_loss": 0.25900042057037354 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.49648517370224, + "learning_rate": 1.4157809455526789e-05, + "loss": 0.2329, + "step": 17662, + "teacher_loss": 0.20363697409629822 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.7446349263191223, + "learning_rate": 1.4155541798184299e-05, + "loss": 0.2716, + "step": 17663, + "teacher_loss": 0.2190054953098297 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.27723339200019836, + "learning_rate": 1.4153274160202702e-05, + "loss": 0.1999, + "step": 17664, + "teacher_loss": 0.19125187397003174 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.6717166900634766, + "learning_rate": 1.4151006541633989e-05, + "loss": 0.4072, + "step": 17665, + "teacher_loss": 0.3778409957885742 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3500524163246155, + "learning_rate": 1.4148738942530152e-05, + "loss": 0.3734, + "step": 17666, + "teacher_loss": 0.37594538927078247 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5047045350074768, + "learning_rate": 1.4146471362943182e-05, + "loss": 0.223, + "step": 17667, + "teacher_loss": 0.19164644181728363 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.4416535496711731, + "learning_rate": 1.4144203802925054e-05, + "loss": 0.2412, + "step": 17668, + "teacher_loss": 0.21894004940986633 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.6555887460708618, + "learning_rate": 1.4141936262527778e-05, + "loss": 0.2562, + "step": 17669, + "teacher_loss": 0.21176809072494507 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.3796350359916687, + "learning_rate": 1.4139668741803329e-05, + "loss": 0.1752, + "step": 17670, + "teacher_loss": 0.15246069431304932 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.5755200982093811, + "learning_rate": 1.4137401240803692e-05, + "loss": 0.3264, + "step": 17671, + "teacher_loss": 0.2986907958984375 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.301070898771286, + "learning_rate": 1.4135133759580861e-05, + "loss": 0.176, + "step": 17672, + "teacher_loss": 0.16208702325820923 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.9147008657455444, + "learning_rate": 1.4132866298186821e-05, + "loss": 0.3147, + "step": 17673, + "teacher_loss": 0.24803461134433746 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.4807908535003662, + "learning_rate": 1.4130598856673552e-05, + "loss": 0.2223, + "step": 17674, + "teacher_loss": 0.1935236006975174 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.883996844291687, + "learning_rate": 1.4128331435093049e-05, + "loss": 0.3523, + "step": 17675, + "teacher_loss": 0.2932104170322418 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.2951236963272095, + "learning_rate": 1.4126064033497293e-05, + "loss": 0.213, + "step": 17676, + "teacher_loss": 0.20385511219501495 + }, + { + "compression_loss": 0.0, + "epoch": 3.19, + "label_loss": 0.6488451957702637, + "learning_rate": 1.412379665193827e-05, + "loss": 0.3624, + "step": 17677, + "teacher_loss": 0.3306090533733368 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.38633355498313904, + "learning_rate": 1.4121529290467956e-05, + "loss": 0.1792, + "step": 17678, + "teacher_loss": 0.15614831447601318 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.30094248056411743, + "learning_rate": 1.411926194913835e-05, + "loss": 0.1903, + "step": 17679, + "teacher_loss": 0.17803940176963806 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.16600218415260315, + "learning_rate": 1.4116994628001424e-05, + "loss": 0.2045, + "step": 17680, + "teacher_loss": 0.20878452062606812 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.19084596633911133, + "learning_rate": 1.411472732710916e-05, + "loss": 0.2012, + "step": 17681, + "teacher_loss": 0.2023848295211792 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.20804446935653687, + "learning_rate": 1.4112460046513547e-05, + "loss": 0.2054, + "step": 17682, + "teacher_loss": 0.20508253574371338 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.2727295756340027, + "learning_rate": 1.4110192786266564e-05, + "loss": 0.2159, + "step": 17683, + "teacher_loss": 0.20959827303886414 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 1.2126994132995605, + "learning_rate": 1.4107925546420191e-05, + "loss": 0.4428, + "step": 17684, + "teacher_loss": 0.35727572441101074 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.6570178270339966, + "learning_rate": 1.4105658327026413e-05, + "loss": 0.2816, + "step": 17685, + "teacher_loss": 0.23986339569091797 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.18651720881462097, + "learning_rate": 1.4103391128137208e-05, + "loss": 0.1791, + "step": 17686, + "teacher_loss": 0.1782565861940384 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.8153842687606812, + "learning_rate": 1.4101123949804553e-05, + "loss": 0.4564, + "step": 17687, + "teacher_loss": 0.4164618253707886 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.26899033784866333, + "learning_rate": 1.4098856792080434e-05, + "loss": 0.229, + "step": 17688, + "teacher_loss": 0.22460463643074036 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.17461831867694855, + "learning_rate": 1.4096589655016827e-05, + "loss": 0.206, + "step": 17689, + "teacher_loss": 0.20951685309410095 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.23320603370666504, + "learning_rate": 1.4094322538665707e-05, + "loss": 0.1781, + "step": 17690, + "teacher_loss": 0.17200851440429688 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.47878435254096985, + "learning_rate": 1.4092055443079059e-05, + "loss": 0.2868, + "step": 17691, + "teacher_loss": 0.265491783618927 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4098367691040039, + "learning_rate": 1.4089788368308862e-05, + "loss": 0.2362, + "step": 17692, + "teacher_loss": 0.21688339114189148 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4018549621105194, + "learning_rate": 1.4087521314407081e-05, + "loss": 0.2049, + "step": 17693, + "teacher_loss": 0.18304800987243652 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.400320827960968, + "learning_rate": 1.4085254281425707e-05, + "loss": 0.2331, + "step": 17694, + "teacher_loss": 0.21457022428512573 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.8185330033302307, + "learning_rate": 1.4082987269416708e-05, + "loss": 0.378, + "step": 17695, + "teacher_loss": 0.32909512519836426 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.6196879148483276, + "learning_rate": 1.4080720278432056e-05, + "loss": 0.2412, + "step": 17696, + "teacher_loss": 0.19913756847381592 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.5211464762687683, + "learning_rate": 1.4078453308523737e-05, + "loss": 0.2043, + "step": 17697, + "teacher_loss": 0.16904941201210022 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.497321218252182, + "learning_rate": 1.4076186359743721e-05, + "loss": 0.2597, + "step": 17698, + "teacher_loss": 0.2333253026008606 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.397977352142334, + "learning_rate": 1.4073919432143981e-05, + "loss": 0.1895, + "step": 17699, + "teacher_loss": 0.16636788845062256 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4134765863418579, + "learning_rate": 1.4071652525776488e-05, + "loss": 0.2006, + "step": 17700, + "teacher_loss": 0.1769149899482727 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4349774718284607, + "learning_rate": 1.4069385640693226e-05, + "loss": 0.2345, + "step": 17701, + "teacher_loss": 0.21224355697631836 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.30599090456962585, + "learning_rate": 1.406711877694616e-05, + "loss": 0.2445, + "step": 17702, + "teacher_loss": 0.23763877153396606 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.37175849080085754, + "learning_rate": 1.4064851934587256e-05, + "loss": 0.2657, + "step": 17703, + "teacher_loss": 0.2539531886577606 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4812345504760742, + "learning_rate": 1.4062585113668503e-05, + "loss": 0.258, + "step": 17704, + "teacher_loss": 0.23319700360298157 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 1.0152655839920044, + "learning_rate": 1.4060318314241858e-05, + "loss": 0.4713, + "step": 17705, + "teacher_loss": 0.4109044075012207 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.6553852558135986, + "learning_rate": 1.4058051536359297e-05, + "loss": 0.2487, + "step": 17706, + "teacher_loss": 0.20351681113243103 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4203108549118042, + "learning_rate": 1.4055784780072792e-05, + "loss": 0.2351, + "step": 17707, + "teacher_loss": 0.21454772353172302 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 1.3231407403945923, + "learning_rate": 1.405351804543431e-05, + "loss": 0.3969, + "step": 17708, + "teacher_loss": 0.29393795132637024 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.7056772112846375, + "learning_rate": 1.4051251332495819e-05, + "loss": 0.4094, + "step": 17709, + "teacher_loss": 0.3764837384223938 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.6474820375442505, + "learning_rate": 1.4048984641309293e-05, + "loss": 0.2457, + "step": 17710, + "teacher_loss": 0.20108479261398315 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.39592161774635315, + "learning_rate": 1.4046717971926699e-05, + "loss": 0.1769, + "step": 17711, + "teacher_loss": 0.15252313017845154 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.5343132615089417, + "learning_rate": 1.4044451324399999e-05, + "loss": 0.3181, + "step": 17712, + "teacher_loss": 0.29402533173561096 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.20495331287384033, + "learning_rate": 1.4042184698781169e-05, + "loss": 0.2235, + "step": 17713, + "teacher_loss": 0.2255142778158188 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.34046465158462524, + "learning_rate": 1.4039918095122173e-05, + "loss": 0.24, + "step": 17714, + "teacher_loss": 0.22879615426063538 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.1492111086845398, + "learning_rate": 1.4037651513474973e-05, + "loss": 0.2264, + "step": 17715, + "teacher_loss": 0.23500242829322815 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.3030222952365875, + "learning_rate": 1.4035384953891541e-05, + "loss": 0.2291, + "step": 17716, + "teacher_loss": 0.22091852128505707 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.21485331654548645, + "learning_rate": 1.4033118416423844e-05, + "loss": 0.1885, + "step": 17717, + "teacher_loss": 0.1855505108833313 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.2831752300262451, + "learning_rate": 1.4030851901123833e-05, + "loss": 0.1632, + "step": 17718, + "teacher_loss": 0.14986774325370789 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.23836159706115723, + "learning_rate": 1.4028585408043491e-05, + "loss": 0.2117, + "step": 17719, + "teacher_loss": 0.20869216322898865 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.3062734603881836, + "learning_rate": 1.4026318937234772e-05, + "loss": 0.197, + "step": 17720, + "teacher_loss": 0.18488062918186188 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.3281233608722687, + "learning_rate": 1.4024052488749639e-05, + "loss": 0.1921, + "step": 17721, + "teacher_loss": 0.177039235830307 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.059197455644607544, + "learning_rate": 1.4021786062640053e-05, + "loss": 0.1656, + "step": 17722, + "teacher_loss": 0.17742754518985748 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.1570141762495041, + "learning_rate": 1.4019519658957984e-05, + "loss": 0.1593, + "step": 17723, + "teacher_loss": 0.15955600142478943 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.404154896736145, + "learning_rate": 1.401725327775539e-05, + "loss": 0.2089, + "step": 17724, + "teacher_loss": 0.1872521936893463 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.3694135844707489, + "learning_rate": 1.4014986919084228e-05, + "loss": 0.234, + "step": 17725, + "teacher_loss": 0.21898458898067474 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.1793111264705658, + "learning_rate": 1.4012720582996466e-05, + "loss": 0.1449, + "step": 17726, + "teacher_loss": 0.14107060432434082 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.34231704473495483, + "learning_rate": 1.4010454269544064e-05, + "loss": 0.2774, + "step": 17727, + "teacher_loss": 0.2701775133609772 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.35584425926208496, + "learning_rate": 1.400818797877897e-05, + "loss": 0.2467, + "step": 17728, + "teacher_loss": 0.23459625244140625 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.26902374625205994, + "learning_rate": 1.4005921710753159e-05, + "loss": 0.1591, + "step": 17729, + "teacher_loss": 0.14692333340644836 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.5736459493637085, + "learning_rate": 1.4003655465518582e-05, + "loss": 0.1924, + "step": 17730, + "teacher_loss": 0.15006959438323975 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.4083263576030731, + "learning_rate": 1.400138924312719e-05, + "loss": 0.2416, + "step": 17731, + "teacher_loss": 0.22303420305252075 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.44486677646636963, + "learning_rate": 1.399912304363096e-05, + "loss": 0.2068, + "step": 17732, + "teacher_loss": 0.18037262558937073 + }, + { + "compression_loss": 0.0, + "epoch": 3.2, + "label_loss": 0.3760298490524292, + "learning_rate": 1.3996856867081834e-05, + "loss": 0.2479, + "step": 17733, + "teacher_loss": 0.23361876606941223 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.23757338523864746, + "learning_rate": 1.3994590713531768e-05, + "loss": 0.1829, + "step": 17734, + "teacher_loss": 0.17678220570087433 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.2864382863044739, + "learning_rate": 1.3992324583032727e-05, + "loss": 0.362, + "step": 17735, + "teacher_loss": 0.37041598558425903 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.5763384103775024, + "learning_rate": 1.3990058475636663e-05, + "loss": 0.2051, + "step": 17736, + "teacher_loss": 0.16379867494106293 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.4633319079875946, + "learning_rate": 1.3987792391395526e-05, + "loss": 0.2256, + "step": 17737, + "teacher_loss": 0.19915470480918884 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.24617871642112732, + "learning_rate": 1.3985526330361277e-05, + "loss": 0.2065, + "step": 17738, + "teacher_loss": 0.2020697295665741 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7810106873512268, + "learning_rate": 1.3983260292585869e-05, + "loss": 0.2663, + "step": 17739, + "teacher_loss": 0.20915773510932922 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7676599025726318, + "learning_rate": 1.3980994278121256e-05, + "loss": 0.4112, + "step": 17740, + "teacher_loss": 0.3715493381023407 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.2399691641330719, + "learning_rate": 1.3978728287019381e-05, + "loss": 0.2957, + "step": 17741, + "teacher_loss": 0.30185675621032715 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.9170626401901245, + "learning_rate": 1.3976462319332214e-05, + "loss": 0.3016, + "step": 17742, + "teacher_loss": 0.23324422538280487 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.8479702472686768, + "learning_rate": 1.3974196375111693e-05, + "loss": 0.435, + "step": 17743, + "teacher_loss": 0.3890647292137146 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.34014949202537537, + "learning_rate": 1.3971930454409772e-05, + "loss": 0.1435, + "step": 17744, + "teacher_loss": 0.12165423482656479 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.4077708423137665, + "learning_rate": 1.3969664557278405e-05, + "loss": 0.2113, + "step": 17745, + "teacher_loss": 0.18946444988250732 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.16778525710105896, + "learning_rate": 1.3967398683769543e-05, + "loss": 0.1694, + "step": 17746, + "teacher_loss": 0.16960649192333221 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.19429773092269897, + "learning_rate": 1.3965132833935126e-05, + "loss": 0.1507, + "step": 17747, + "teacher_loss": 0.145864337682724 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.5520732998847961, + "learning_rate": 1.3962867007827118e-05, + "loss": 0.3056, + "step": 17748, + "teacher_loss": 0.27824074029922485 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.4759901762008667, + "learning_rate": 1.3960601205497456e-05, + "loss": 0.5577, + "step": 17749, + "teacher_loss": 0.566731333732605 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.23135754466056824, + "learning_rate": 1.3958335426998091e-05, + "loss": 0.235, + "step": 17750, + "teacher_loss": 0.23542466759681702 + }, + { + "epoch": 3.21, + "eval_exact_match": 79.90539262062441, + "eval_f1": 87.3797160961695, + "step": 17750 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.82684326171875, + "learning_rate": 1.3956069672380974e-05, + "loss": 0.8044, + "step": 17751, + "teacher_loss": 0.8018832206726074 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.39377492666244507, + "learning_rate": 1.3953803941698054e-05, + "loss": 0.25, + "step": 17752, + "teacher_loss": 0.23407380282878876 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.33119285106658936, + "learning_rate": 1.3951538235001262e-05, + "loss": 0.1821, + "step": 17753, + "teacher_loss": 0.16549348831176758 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7300830483436584, + "learning_rate": 1.3949272552342564e-05, + "loss": 0.3055, + "step": 17754, + "teacher_loss": 0.2582813501358032 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7702221870422363, + "learning_rate": 1.3947006893773894e-05, + "loss": 0.2558, + "step": 17755, + "teacher_loss": 0.19865018129348755 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.07892671972513199, + "learning_rate": 1.3944741259347193e-05, + "loss": 0.1785, + "step": 17756, + "teacher_loss": 0.18961171805858612 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.6018067002296448, + "learning_rate": 1.3942475649114421e-05, + "loss": 0.38, + "step": 17757, + "teacher_loss": 0.3554080128669739 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.5098625421524048, + "learning_rate": 1.3940210063127509e-05, + "loss": 0.2096, + "step": 17758, + "teacher_loss": 0.17625929415225983 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.25803834199905396, + "learning_rate": 1.3937944501438398e-05, + "loss": 0.2406, + "step": 17759, + "teacher_loss": 0.23868940770626068 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.21180912852287292, + "learning_rate": 1.3935678964099037e-05, + "loss": 0.2184, + "step": 17760, + "teacher_loss": 0.2191643863916397 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.1667906641960144, + "learning_rate": 1.393341345116137e-05, + "loss": 0.1503, + "step": 17761, + "teacher_loss": 0.1484241783618927 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.9333224892616272, + "learning_rate": 1.3931147962677332e-05, + "loss": 0.4977, + "step": 17762, + "teacher_loss": 0.44930300116539 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.4633907675743103, + "learning_rate": 1.3928882498698864e-05, + "loss": 0.3167, + "step": 17763, + "teacher_loss": 0.3004424571990967 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.6299740076065063, + "learning_rate": 1.3926617059277913e-05, + "loss": 0.529, + "step": 17764, + "teacher_loss": 0.5177338719367981 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7085591554641724, + "learning_rate": 1.3924351644466418e-05, + "loss": 0.2092, + "step": 17765, + "teacher_loss": 0.1536863148212433 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.44883373379707336, + "learning_rate": 1.3922086254316305e-05, + "loss": 0.2284, + "step": 17766, + "teacher_loss": 0.20394131541252136 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.4773971438407898, + "learning_rate": 1.391982088887953e-05, + "loss": 0.2131, + "step": 17767, + "teacher_loss": 0.18375900387763977 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.27856025099754333, + "learning_rate": 1.3917555548208023e-05, + "loss": 0.2092, + "step": 17768, + "teacher_loss": 0.2014828473329544 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.37856942415237427, + "learning_rate": 1.3915290232353719e-05, + "loss": 0.2301, + "step": 17769, + "teacher_loss": 0.21359002590179443 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.29520073533058167, + "learning_rate": 1.391302494136856e-05, + "loss": 0.2402, + "step": 17770, + "teacher_loss": 0.2340729981660843 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.3753064274787903, + "learning_rate": 1.3910759675304478e-05, + "loss": 0.2574, + "step": 17771, + "teacher_loss": 0.24427062273025513 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.13529081642627716, + "learning_rate": 1.3908494434213411e-05, + "loss": 0.1777, + "step": 17772, + "teacher_loss": 0.1824117749929428 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.8264325857162476, + "learning_rate": 1.3906229218147296e-05, + "loss": 0.3665, + "step": 17773, + "teacher_loss": 0.3153989911079407 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.6421117782592773, + "learning_rate": 1.3903964027158066e-05, + "loss": 0.381, + "step": 17774, + "teacher_loss": 0.3519960641860962 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.37415218353271484, + "learning_rate": 1.390169886129765e-05, + "loss": 0.1854, + "step": 17775, + "teacher_loss": 0.16437961161136627 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7798163890838623, + "learning_rate": 1.389943372061799e-05, + "loss": 0.3087, + "step": 17776, + "teacher_loss": 0.25636786222457886 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.6778844594955444, + "learning_rate": 1.389716860517102e-05, + "loss": 0.4921, + "step": 17777, + "teacher_loss": 0.47141796350479126 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7336118221282959, + "learning_rate": 1.3894903515008657e-05, + "loss": 0.2345, + "step": 17778, + "teacher_loss": 0.17899875342845917 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.5994202494621277, + "learning_rate": 1.3892638450182852e-05, + "loss": 0.204, + "step": 17779, + "teacher_loss": 0.16002686321735382 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.20934957265853882, + "learning_rate": 1.3890373410745525e-05, + "loss": 0.1766, + "step": 17780, + "teacher_loss": 0.17296954989433289 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.25799915194511414, + "learning_rate": 1.388810839674861e-05, + "loss": 0.148, + "step": 17781, + "teacher_loss": 0.13580778241157532 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.7298446893692017, + "learning_rate": 1.3885843408244032e-05, + "loss": 0.3167, + "step": 17782, + "teacher_loss": 0.27077868580818176 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.1925753951072693, + "learning_rate": 1.3883578445283726e-05, + "loss": 0.1501, + "step": 17783, + "teacher_loss": 0.14532917737960815 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.13934828341007233, + "learning_rate": 1.3881313507919618e-05, + "loss": 0.1928, + "step": 17784, + "teacher_loss": 0.1987382173538208 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.5044597387313843, + "learning_rate": 1.3879048596203637e-05, + "loss": 0.246, + "step": 17785, + "teacher_loss": 0.21726396679878235 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.33396244049072266, + "learning_rate": 1.3876783710187711e-05, + "loss": 0.2013, + "step": 17786, + "teacher_loss": 0.18651892244815826 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.26629796624183655, + "learning_rate": 1.3874518849923769e-05, + "loss": 0.1879, + "step": 17787, + "teacher_loss": 0.17923521995544434 + }, + { + "compression_loss": 0.0, + "epoch": 3.21, + "label_loss": 0.9082004427909851, + "learning_rate": 1.3872254015463733e-05, + "loss": 0.2493, + "step": 17788, + "teacher_loss": 0.1761389523744583 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.3509395718574524, + "learning_rate": 1.3869989206859533e-05, + "loss": 0.2243, + "step": 17789, + "teacher_loss": 0.21019186079502106 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.5572032928466797, + "learning_rate": 1.3867724424163097e-05, + "loss": 0.2458, + "step": 17790, + "teacher_loss": 0.2111891657114029 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.3161216676235199, + "learning_rate": 1.3865459667426335e-05, + "loss": 0.2275, + "step": 17791, + "teacher_loss": 0.2176235467195511 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.36654701828956604, + "learning_rate": 1.386319493670119e-05, + "loss": 0.2477, + "step": 17792, + "teacher_loss": 0.23445579409599304 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.37016183137893677, + "learning_rate": 1.3860930232039575e-05, + "loss": 0.1863, + "step": 17793, + "teacher_loss": 0.16586801409721375 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.2387593686580658, + "learning_rate": 1.3858665553493411e-05, + "loss": 0.2156, + "step": 17794, + "teacher_loss": 0.21303579211235046 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 1.050684928894043, + "learning_rate": 1.3856400901114627e-05, + "loss": 0.4766, + "step": 17795, + "teacher_loss": 0.4128328561782837 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.2775558531284332, + "learning_rate": 1.385413627495514e-05, + "loss": 0.1774, + "step": 17796, + "teacher_loss": 0.1662633717060089 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.3065398931503296, + "learning_rate": 1.3851871675066873e-05, + "loss": 0.3035, + "step": 17797, + "teacher_loss": 0.30317962169647217 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.18469856679439545, + "learning_rate": 1.3849607101501748e-05, + "loss": 0.1597, + "step": 17798, + "teacher_loss": 0.1569458693265915 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.6691349744796753, + "learning_rate": 1.3847342554311681e-05, + "loss": 0.2884, + "step": 17799, + "teacher_loss": 0.2460532784461975 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.17319554090499878, + "learning_rate": 1.3845078033548592e-05, + "loss": 0.1951, + "step": 17800, + "teacher_loss": 0.19758352637290955 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.9470968246459961, + "learning_rate": 1.3842813539264405e-05, + "loss": 0.3105, + "step": 17801, + "teacher_loss": 0.23980209231376648 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.6728264093399048, + "learning_rate": 1.3840549071511036e-05, + "loss": 0.2987, + "step": 17802, + "teacher_loss": 0.2570805251598358 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.6264507174491882, + "learning_rate": 1.3838284630340398e-05, + "loss": 0.2158, + "step": 17803, + "teacher_loss": 0.17012710869312286 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.5300797820091248, + "learning_rate": 1.3836020215804407e-05, + "loss": 0.3452, + "step": 17804, + "teacher_loss": 0.3246801495552063 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.496523916721344, + "learning_rate": 1.3833755827954985e-05, + "loss": 0.2112, + "step": 17805, + "teacher_loss": 0.17947755753993988 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.40019291639328003, + "learning_rate": 1.3831491466844047e-05, + "loss": 0.2001, + "step": 17806, + "teacher_loss": 0.17790161073207855 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.2955458462238312, + "learning_rate": 1.3829227132523502e-05, + "loss": 0.2167, + "step": 17807, + "teacher_loss": 0.20796501636505127 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4099975824356079, + "learning_rate": 1.3826962825045272e-05, + "loss": 0.2128, + "step": 17808, + "teacher_loss": 0.19083893299102783 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.3059951663017273, + "learning_rate": 1.3824698544461268e-05, + "loss": 0.1935, + "step": 17809, + "teacher_loss": 0.18097171187400818 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4173629879951477, + "learning_rate": 1.3822434290823399e-05, + "loss": 0.2493, + "step": 17810, + "teacher_loss": 0.23062361776828766 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.2537233829498291, + "learning_rate": 1.3820170064183584e-05, + "loss": 0.1709, + "step": 17811, + "teacher_loss": 0.16165202856063843 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.22788269817829132, + "learning_rate": 1.3817905864593733e-05, + "loss": 0.1496, + "step": 17812, + "teacher_loss": 0.14094901084899902 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4514191746711731, + "learning_rate": 1.3815641692105753e-05, + "loss": 0.2601, + "step": 17813, + "teacher_loss": 0.23888617753982544 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.38667869567871094, + "learning_rate": 1.3813377546771561e-05, + "loss": 0.288, + "step": 17814, + "teacher_loss": 0.2770565152168274 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4020824432373047, + "learning_rate": 1.3811113428643067e-05, + "loss": 0.3178, + "step": 17815, + "teacher_loss": 0.3084867000579834 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.25023317337036133, + "learning_rate": 1.380884933777217e-05, + "loss": 0.2005, + "step": 17816, + "teacher_loss": 0.19499355554580688 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.37997233867645264, + "learning_rate": 1.3806585274210794e-05, + "loss": 0.2496, + "step": 17817, + "teacher_loss": 0.23506735265254974 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.7976357936859131, + "learning_rate": 1.3804321238010837e-05, + "loss": 0.3665, + "step": 17818, + "teacher_loss": 0.3185747265815735 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.7212059497833252, + "learning_rate": 1.3802057229224206e-05, + "loss": 0.3755, + "step": 17819, + "teacher_loss": 0.3371182978153229 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4705544114112854, + "learning_rate": 1.3799793247902814e-05, + "loss": 0.1691, + "step": 17820, + "teacher_loss": 0.13560077548027039 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.38171130418777466, + "learning_rate": 1.3797529294098564e-05, + "loss": 0.2134, + "step": 17821, + "teacher_loss": 0.19466093182563782 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.31826701760292053, + "learning_rate": 1.379526536786336e-05, + "loss": 0.2296, + "step": 17822, + "teacher_loss": 0.2197890281677246 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.3092745542526245, + "learning_rate": 1.3793001469249112e-05, + "loss": 0.1459, + "step": 17823, + "teacher_loss": 0.12775088846683502 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.3573278486728668, + "learning_rate": 1.3790737598307722e-05, + "loss": 0.1503, + "step": 17824, + "teacher_loss": 0.1273033618927002 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 1.042050838470459, + "learning_rate": 1.3788473755091097e-05, + "loss": 0.3295, + "step": 17825, + "teacher_loss": 0.2503780722618103 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.7646037340164185, + "learning_rate": 1.3786209939651124e-05, + "loss": 0.3309, + "step": 17826, + "teacher_loss": 0.2826780676841736 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.6198161840438843, + "learning_rate": 1.378394615203973e-05, + "loss": 0.2434, + "step": 17827, + "teacher_loss": 0.20158883929252625 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.8396542072296143, + "learning_rate": 1.3781682392308801e-05, + "loss": 0.3676, + "step": 17828, + "teacher_loss": 0.31516358256340027 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4323810338973999, + "learning_rate": 1.3779418660510237e-05, + "loss": 0.2077, + "step": 17829, + "teacher_loss": 0.18271008133888245 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.7173413038253784, + "learning_rate": 1.377715495669595e-05, + "loss": 0.2327, + "step": 17830, + "teacher_loss": 0.17882099747657776 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4369521737098694, + "learning_rate": 1.3774891280917831e-05, + "loss": 0.2385, + "step": 17831, + "teacher_loss": 0.21650457382202148 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.5975555181503296, + "learning_rate": 1.377262763322778e-05, + "loss": 0.2396, + "step": 17832, + "teacher_loss": 0.19982370734214783 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4877334237098694, + "learning_rate": 1.3770364013677701e-05, + "loss": 0.2167, + "step": 17833, + "teacher_loss": 0.18654392659664154 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.496481329202652, + "learning_rate": 1.376810042231949e-05, + "loss": 0.1752, + "step": 17834, + "teacher_loss": 0.13955262303352356 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.4529290497303009, + "learning_rate": 1.3765836859205036e-05, + "loss": 0.2489, + "step": 17835, + "teacher_loss": 0.22623030841350555 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.258700966835022, + "learning_rate": 1.3763573324386247e-05, + "loss": 0.2224, + "step": 17836, + "teacher_loss": 0.21837686002254486 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.37753772735595703, + "learning_rate": 1.3761309817915017e-05, + "loss": 0.204, + "step": 17837, + "teacher_loss": 0.184768408536911 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 1.0870299339294434, + "learning_rate": 1.3759046339843233e-05, + "loss": 0.2382, + "step": 17838, + "teacher_loss": 0.14393766224384308 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.5531726479530334, + "learning_rate": 1.37567828902228e-05, + "loss": 0.2982, + "step": 17839, + "teacher_loss": 0.26991429924964905 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.38269758224487305, + "learning_rate": 1.3754519469105612e-05, + "loss": 0.2805, + "step": 17840, + "teacher_loss": 0.2691340744495392 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.2569577097892761, + "learning_rate": 1.3752256076543549e-05, + "loss": 0.288, + "step": 17841, + "teacher_loss": 0.29143622517585754 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.2525979280471802, + "learning_rate": 1.374999271258852e-05, + "loss": 0.2363, + "step": 17842, + "teacher_loss": 0.23447683453559875 + }, + { + "compression_loss": 0.0, + "epoch": 3.22, + "label_loss": 0.5004053115844727, + "learning_rate": 1.374772937729241e-05, + "loss": 0.2449, + "step": 17843, + "teacher_loss": 0.21653254330158234 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.21672698855400085, + "learning_rate": 1.374546607070711e-05, + "loss": 0.1447, + "step": 17844, + "teacher_loss": 0.13671648502349854 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.38864701986312866, + "learning_rate": 1.3743202792884509e-05, + "loss": 0.3385, + "step": 17845, + "teacher_loss": 0.33290863037109375 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.377738893032074, + "learning_rate": 1.3740939543876504e-05, + "loss": 0.2155, + "step": 17846, + "teacher_loss": 0.19750091433525085 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.5593780279159546, + "learning_rate": 1.3738676323734978e-05, + "loss": 0.2921, + "step": 17847, + "teacher_loss": 0.2623681426048279 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.280231773853302, + "learning_rate": 1.373641313251182e-05, + "loss": 0.1934, + "step": 17848, + "teacher_loss": 0.18378078937530518 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.3979759216308594, + "learning_rate": 1.3734149970258925e-05, + "loss": 0.2608, + "step": 17849, + "teacher_loss": 0.2455916404724121 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.361625611782074, + "learning_rate": 1.3731886837028177e-05, + "loss": 0.2266, + "step": 17850, + "teacher_loss": 0.2116299420595169 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.24727371335029602, + "learning_rate": 1.3729623732871452e-05, + "loss": 0.2299, + "step": 17851, + "teacher_loss": 0.22796601057052612 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.4785187542438507, + "learning_rate": 1.3727360657840658e-05, + "loss": 0.2486, + "step": 17852, + "teacher_loss": 0.22303897142410278 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.42806005477905273, + "learning_rate": 1.3725097611987664e-05, + "loss": 0.2152, + "step": 17853, + "teacher_loss": 0.1915723830461502 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.7730212211608887, + "learning_rate": 1.3722834595364356e-05, + "loss": 0.3504, + "step": 17854, + "teacher_loss": 0.3034707307815552 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2793616056442261, + "learning_rate": 1.3720571608022626e-05, + "loss": 0.1912, + "step": 17855, + "teacher_loss": 0.1813506782054901 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.35565051436424255, + "learning_rate": 1.3718308650014352e-05, + "loss": 0.2278, + "step": 17856, + "teacher_loss": 0.21363089978694916 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.22848287224769592, + "learning_rate": 1.3716045721391415e-05, + "loss": 0.2843, + "step": 17857, + "teacher_loss": 0.29054874181747437 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.6709418296813965, + "learning_rate": 1.3713782822205703e-05, + "loss": 0.2619, + "step": 17858, + "teacher_loss": 0.21644198894500732 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2802004814147949, + "learning_rate": 1.3711519952509096e-05, + "loss": 0.3008, + "step": 17859, + "teacher_loss": 0.3030715584754944 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.9301304221153259, + "learning_rate": 1.3709257112353469e-05, + "loss": 0.3686, + "step": 17860, + "teacher_loss": 0.3061832785606384 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.27422329783439636, + "learning_rate": 1.3706994301790708e-05, + "loss": 0.1946, + "step": 17861, + "teacher_loss": 0.18570773303508759 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.29091331362724304, + "learning_rate": 1.3704731520872693e-05, + "loss": 0.2294, + "step": 17862, + "teacher_loss": 0.2225547432899475 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.6820839643478394, + "learning_rate": 1.3702468769651297e-05, + "loss": 0.2574, + "step": 17863, + "teacher_loss": 0.21026533842086792 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.39962050318717957, + "learning_rate": 1.3700206048178407e-05, + "loss": 0.219, + "step": 17864, + "teacher_loss": 0.19894824922084808 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2082759141921997, + "learning_rate": 1.3697943356505897e-05, + "loss": 0.1868, + "step": 17865, + "teacher_loss": 0.18443885445594788 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.4345414638519287, + "learning_rate": 1.369568069468564e-05, + "loss": 0.175, + "step": 17866, + "teacher_loss": 0.14616265892982483 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2150755226612091, + "learning_rate": 1.3693418062769508e-05, + "loss": 0.1746, + "step": 17867, + "teacher_loss": 0.17014577984809875 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.22658689320087433, + "learning_rate": 1.3691155460809388e-05, + "loss": 0.1702, + "step": 17868, + "teacher_loss": 0.16391071677207947 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.8484402298927307, + "learning_rate": 1.3688892888857149e-05, + "loss": 0.3013, + "step": 17869, + "teacher_loss": 0.24049615859985352 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.690902829170227, + "learning_rate": 1.3686630346964662e-05, + "loss": 0.308, + "step": 17870, + "teacher_loss": 0.26549142599105835 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.30712515115737915, + "learning_rate": 1.3684367835183807e-05, + "loss": 0.1609, + "step": 17871, + "teacher_loss": 0.14463184773921967 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.3385729193687439, + "learning_rate": 1.3682105353566453e-05, + "loss": 0.2131, + "step": 17872, + "teacher_loss": 0.1991869956254959 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.6309759616851807, + "learning_rate": 1.3679842902164467e-05, + "loss": 0.2309, + "step": 17873, + "teacher_loss": 0.1864413321018219 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2998085021972656, + "learning_rate": 1.367758048102973e-05, + "loss": 0.1517, + "step": 17874, + "teacher_loss": 0.13527345657348633 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.5139168500900269, + "learning_rate": 1.3675318090214112e-05, + "loss": 0.2419, + "step": 17875, + "teacher_loss": 0.2117028683423996 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.9159693717956543, + "learning_rate": 1.3673055729769467e-05, + "loss": 0.3943, + "step": 17876, + "teacher_loss": 0.3363143503665924 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.7316271066665649, + "learning_rate": 1.3670793399747686e-05, + "loss": 0.2156, + "step": 17877, + "teacher_loss": 0.15831011533737183 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.23709836602210999, + "learning_rate": 1.3668531100200624e-05, + "loss": 0.1891, + "step": 17878, + "teacher_loss": 0.18372409045696259 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.45341965556144714, + "learning_rate": 1.3666268831180144e-05, + "loss": 0.2838, + "step": 17879, + "teacher_loss": 0.2649998664855957 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.29396146535873413, + "learning_rate": 1.3664006592738133e-05, + "loss": 0.208, + "step": 17880, + "teacher_loss": 0.19839657843112946 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.5531530380249023, + "learning_rate": 1.3661744384926442e-05, + "loss": 0.2741, + "step": 17881, + "teacher_loss": 0.2431270182132721 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.29309001564979553, + "learning_rate": 1.3659482207796936e-05, + "loss": 0.2432, + "step": 17882, + "teacher_loss": 0.23762616515159607 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2827276289463043, + "learning_rate": 1.3657220061401486e-05, + "loss": 0.2173, + "step": 17883, + "teacher_loss": 0.21004685759544373 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.41173237562179565, + "learning_rate": 1.3654957945791955e-05, + "loss": 0.2126, + "step": 17884, + "teacher_loss": 0.19047416746616364 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.290057897567749, + "learning_rate": 1.3652695861020206e-05, + "loss": 0.2198, + "step": 17885, + "teacher_loss": 0.21198277175426483 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.36265161633491516, + "learning_rate": 1.36504338071381e-05, + "loss": 0.2294, + "step": 17886, + "teacher_loss": 0.21459153294563293 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.3917011618614197, + "learning_rate": 1.36481717841975e-05, + "loss": 0.2348, + "step": 17887, + "teacher_loss": 0.21741682291030884 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.5426660776138306, + "learning_rate": 1.3645909792250274e-05, + "loss": 0.3107, + "step": 17888, + "teacher_loss": 0.2848764955997467 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.3115384578704834, + "learning_rate": 1.3643647831348267e-05, + "loss": 0.2121, + "step": 17889, + "teacher_loss": 0.20105823874473572 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.3003392219543457, + "learning_rate": 1.3641385901543356e-05, + "loss": 0.2289, + "step": 17890, + "teacher_loss": 0.22092482447624207 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.9843560457229614, + "learning_rate": 1.3639124002887392e-05, + "loss": 0.3292, + "step": 17891, + "teacher_loss": 0.25640881061553955 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.4312601089477539, + "learning_rate": 1.363686213543223e-05, + "loss": 0.2768, + "step": 17892, + "teacher_loss": 0.2596362233161926 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.18807637691497803, + "learning_rate": 1.3634600299229735e-05, + "loss": 0.2414, + "step": 17893, + "teacher_loss": 0.2472846657037735 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.11939650028944016, + "learning_rate": 1.3632338494331764e-05, + "loss": 0.1595, + "step": 17894, + "teacher_loss": 0.16391196846961975 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.8226872682571411, + "learning_rate": 1.3630076720790165e-05, + "loss": 0.3156, + "step": 17895, + "teacher_loss": 0.259303480386734 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 1.1016690731048584, + "learning_rate": 1.3627814978656804e-05, + "loss": 0.254, + "step": 17896, + "teacher_loss": 0.15981265902519226 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.19216234982013702, + "learning_rate": 1.362555326798353e-05, + "loss": 0.1663, + "step": 17897, + "teacher_loss": 0.16339480876922607 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.26410144567489624, + "learning_rate": 1.3623291588822198e-05, + "loss": 0.1442, + "step": 17898, + "teacher_loss": 0.1308397501707077 + }, + { + "compression_loss": 0.0, + "epoch": 3.23, + "label_loss": 0.2545037269592285, + "learning_rate": 1.3621029941224666e-05, + "loss": 0.1808, + "step": 17899, + "teacher_loss": 0.17255869507789612 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.22529344260692596, + "learning_rate": 1.3618768325242784e-05, + "loss": 0.1762, + "step": 17900, + "teacher_loss": 0.17073184251785278 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.5310543775558472, + "learning_rate": 1.3616506740928393e-05, + "loss": 0.3608, + "step": 17901, + "teacher_loss": 0.34183478355407715 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.20028069615364075, + "learning_rate": 1.3614245188333367e-05, + "loss": 0.2475, + "step": 17902, + "teacher_loss": 0.25274956226348877 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.6437517404556274, + "learning_rate": 1.3611983667509538e-05, + "loss": 0.3608, + "step": 17903, + "teacher_loss": 0.32940834760665894 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3183338940143585, + "learning_rate": 1.3609722178508758e-05, + "loss": 0.2328, + "step": 17904, + "teacher_loss": 0.2233031541109085 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.2668170928955078, + "learning_rate": 1.3607460721382888e-05, + "loss": 0.184, + "step": 17905, + "teacher_loss": 0.17485041916370392 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.6202265024185181, + "learning_rate": 1.3605199296183766e-05, + "loss": 0.2493, + "step": 17906, + "teacher_loss": 0.20804864168167114 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.30104145407676697, + "learning_rate": 1.3602937902963242e-05, + "loss": 0.2255, + "step": 17907, + "teacher_loss": 0.21705962717533112 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.23740994930267334, + "learning_rate": 1.3600676541773162e-05, + "loss": 0.1984, + "step": 17908, + "teacher_loss": 0.19404630362987518 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.6643080115318298, + "learning_rate": 1.3598415212665375e-05, + "loss": 0.4363, + "step": 17909, + "teacher_loss": 0.41095784306526184 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.8403022289276123, + "learning_rate": 1.3596153915691724e-05, + "loss": 0.2469, + "step": 17910, + "teacher_loss": 0.18102183938026428 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.1629820466041565, + "learning_rate": 1.3593892650904052e-05, + "loss": 0.1935, + "step": 17911, + "teacher_loss": 0.1969369351863861 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.4275708496570587, + "learning_rate": 1.3591631418354211e-05, + "loss": 0.2382, + "step": 17912, + "teacher_loss": 0.21713140606880188 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.7296366095542908, + "learning_rate": 1.358937021809404e-05, + "loss": 0.299, + "step": 17913, + "teacher_loss": 0.2511311173439026 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.46660614013671875, + "learning_rate": 1.3587109050175373e-05, + "loss": 0.2337, + "step": 17914, + "teacher_loss": 0.20785781741142273 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.19109860062599182, + "learning_rate": 1.3584847914650067e-05, + "loss": 0.2088, + "step": 17915, + "teacher_loss": 0.2108122706413269 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.29618990421295166, + "learning_rate": 1.3582586811569954e-05, + "loss": 0.2189, + "step": 17916, + "teacher_loss": 0.2102924883365631 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.5907838344573975, + "learning_rate": 1.3580325740986873e-05, + "loss": 0.2859, + "step": 17917, + "teacher_loss": 0.25204408168792725 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.22742050886154175, + "learning_rate": 1.3578064702952668e-05, + "loss": 0.222, + "step": 17918, + "teacher_loss": 0.22135965526103973 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.24711500108242035, + "learning_rate": 1.3575803697519177e-05, + "loss": 0.1955, + "step": 17919, + "teacher_loss": 0.18978387117385864 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.5944147109985352, + "learning_rate": 1.3573542724738233e-05, + "loss": 0.5108, + "step": 17920, + "teacher_loss": 0.5015382170677185 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.41176509857177734, + "learning_rate": 1.3571281784661683e-05, + "loss": 0.2944, + "step": 17921, + "teacher_loss": 0.2813221514225006 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3666585385799408, + "learning_rate": 1.3569020877341356e-05, + "loss": 0.2317, + "step": 17922, + "teacher_loss": 0.21672730147838593 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.49456626176834106, + "learning_rate": 1.3566760002829088e-05, + "loss": 0.3047, + "step": 17923, + "teacher_loss": 0.28362077474594116 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.33030563592910767, + "learning_rate": 1.3564499161176718e-05, + "loss": 0.2266, + "step": 17924, + "teacher_loss": 0.21506813168525696 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3109801709651947, + "learning_rate": 1.356223835243608e-05, + "loss": 0.2203, + "step": 17925, + "teacher_loss": 0.21024981141090393 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.9230259656906128, + "learning_rate": 1.3559977576659e-05, + "loss": 0.5323, + "step": 17926, + "teacher_loss": 0.48883843421936035 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3097284436225891, + "learning_rate": 1.3557716833897326e-05, + "loss": 0.291, + "step": 17927, + "teacher_loss": 0.288865327835083 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.2568824887275696, + "learning_rate": 1.3555456124202876e-05, + "loss": 0.2158, + "step": 17928, + "teacher_loss": 0.2112794816493988 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.43142515420913696, + "learning_rate": 1.3553195447627486e-05, + "loss": 0.2053, + "step": 17929, + "teacher_loss": 0.1802024096250534 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.4834776818752289, + "learning_rate": 1.3550934804222983e-05, + "loss": 0.1857, + "step": 17930, + "teacher_loss": 0.15262514352798462 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 1.0416516065597534, + "learning_rate": 1.3548674194041205e-05, + "loss": 0.3945, + "step": 17931, + "teacher_loss": 0.3226143717765808 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.47691306471824646, + "learning_rate": 1.3546413617133977e-05, + "loss": 0.2369, + "step": 17932, + "teacher_loss": 0.21026135981082916 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3165375590324402, + "learning_rate": 1.3544153073553122e-05, + "loss": 0.2342, + "step": 17933, + "teacher_loss": 0.2250521183013916 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.33222872018814087, + "learning_rate": 1.3541892563350476e-05, + "loss": 0.2721, + "step": 17934, + "teacher_loss": 0.26539868116378784 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3754420280456543, + "learning_rate": 1.3539632086577862e-05, + "loss": 0.2705, + "step": 17935, + "teacher_loss": 0.258821576833725 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.41611868143081665, + "learning_rate": 1.3537371643287103e-05, + "loss": 0.334, + "step": 17936, + "teacher_loss": 0.32491499185562134 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.5634071826934814, + "learning_rate": 1.3535111233530028e-05, + "loss": 0.2851, + "step": 17937, + "teacher_loss": 0.25421175360679626 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.7406641840934753, + "learning_rate": 1.3532850857358467e-05, + "loss": 0.212, + "step": 17938, + "teacher_loss": 0.15324822068214417 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.32580795884132385, + "learning_rate": 1.3530590514824226e-05, + "loss": 0.1646, + "step": 17939, + "teacher_loss": 0.1467362642288208 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.2023438811302185, + "learning_rate": 1.3528330205979149e-05, + "loss": 0.1709, + "step": 17940, + "teacher_loss": 0.1673606038093567 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.5118176341056824, + "learning_rate": 1.3526069930875044e-05, + "loss": 0.3031, + "step": 17941, + "teacher_loss": 0.27990370988845825 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.8362551927566528, + "learning_rate": 1.3523809689563733e-05, + "loss": 0.391, + "step": 17942, + "teacher_loss": 0.34148338437080383 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.35762929916381836, + "learning_rate": 1.3521549482097045e-05, + "loss": 0.221, + "step": 17943, + "teacher_loss": 0.20583957433700562 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.28415733575820923, + "learning_rate": 1.3519289308526792e-05, + "loss": 0.1669, + "step": 17944, + "teacher_loss": 0.153926283121109 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.7693701982498169, + "learning_rate": 1.3517029168904796e-05, + "loss": 0.321, + "step": 17945, + "teacher_loss": 0.27118054032325745 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.5445525050163269, + "learning_rate": 1.3514769063282875e-05, + "loss": 0.3355, + "step": 17946, + "teacher_loss": 0.3122883439064026 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.3659040629863739, + "learning_rate": 1.3512508991712848e-05, + "loss": 0.1529, + "step": 17947, + "teacher_loss": 0.12917840480804443 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.47576749324798584, + "learning_rate": 1.3510248954246532e-05, + "loss": 0.1964, + "step": 17948, + "teacher_loss": 0.1653842329978943 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 1.0991228818893433, + "learning_rate": 1.3507988950935733e-05, + "loss": 0.4157, + "step": 17949, + "teacher_loss": 0.33978402614593506 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.7395064830780029, + "learning_rate": 1.3505728981832285e-05, + "loss": 0.3408, + "step": 17950, + "teacher_loss": 0.29649513959884644 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.24430668354034424, + "learning_rate": 1.3503469046987983e-05, + "loss": 0.2416, + "step": 17951, + "teacher_loss": 0.24131101369857788 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.9090707302093506, + "learning_rate": 1.3501209146454651e-05, + "loss": 0.4204, + "step": 17952, + "teacher_loss": 0.366134375333786 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.4414246678352356, + "learning_rate": 1.34989492802841e-05, + "loss": 0.2309, + "step": 17953, + "teacher_loss": 0.20747947692871094 + }, + { + "compression_loss": 0.0, + "epoch": 3.24, + "label_loss": 0.35907846689224243, + "learning_rate": 1.3496689448528143e-05, + "loss": 0.2822, + "step": 17954, + "teacher_loss": 0.2736409306526184 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.19814419746398926, + "learning_rate": 1.3494429651238585e-05, + "loss": 0.1854, + "step": 17955, + "teacher_loss": 0.1839936226606369 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.2181391417980194, + "learning_rate": 1.3492169888467246e-05, + "loss": 0.1725, + "step": 17956, + "teacher_loss": 0.1674099564552307 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.46645861864089966, + "learning_rate": 1.348991016026593e-05, + "loss": 0.2356, + "step": 17957, + "teacher_loss": 0.2099272459745407 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4819023609161377, + "learning_rate": 1.3487650466686442e-05, + "loss": 0.2209, + "step": 17958, + "teacher_loss": 0.1919083148241043 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5197563171386719, + "learning_rate": 1.3485390807780601e-05, + "loss": 0.2245, + "step": 17959, + "teacher_loss": 0.19174908101558685 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.440390408039093, + "learning_rate": 1.3483131183600205e-05, + "loss": 0.1675, + "step": 17960, + "teacher_loss": 0.1371985524892807 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.33385708928108215, + "learning_rate": 1.348087159419706e-05, + "loss": 0.3438, + "step": 17961, + "teacher_loss": 0.3448812961578369 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.46897149085998535, + "learning_rate": 1.347861203962298e-05, + "loss": 0.2089, + "step": 17962, + "teacher_loss": 0.18005365133285522 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.3958002030849457, + "learning_rate": 1.3476352519929766e-05, + "loss": 0.2512, + "step": 17963, + "teacher_loss": 0.23509825766086578 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4387205243110657, + "learning_rate": 1.3474093035169212e-05, + "loss": 0.3714, + "step": 17964, + "teacher_loss": 0.3639586269855499 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.18875867128372192, + "learning_rate": 1.3471833585393139e-05, + "loss": 0.2247, + "step": 17965, + "teacher_loss": 0.228724867105484 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4327235817909241, + "learning_rate": 1.3469574170653337e-05, + "loss": 0.2545, + "step": 17966, + "teacher_loss": 0.23472946882247925 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.17957806587219238, + "learning_rate": 1.3467314791001608e-05, + "loss": 0.1873, + "step": 17967, + "teacher_loss": 0.18815457820892334 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.8084374666213989, + "learning_rate": 1.3465055446489757e-05, + "loss": 0.5776, + "step": 17968, + "teacher_loss": 0.5519437193870544 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.25457704067230225, + "learning_rate": 1.3462796137169584e-05, + "loss": 0.1875, + "step": 17969, + "teacher_loss": 0.1800491213798523 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 1.3587530851364136, + "learning_rate": 1.3460536863092888e-05, + "loss": 0.455, + "step": 17970, + "teacher_loss": 0.354530394077301 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.8940027952194214, + "learning_rate": 1.3458277624311461e-05, + "loss": 0.3136, + "step": 17971, + "teacher_loss": 0.24912425875663757 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.841683030128479, + "learning_rate": 1.3456018420877111e-05, + "loss": 0.3592, + "step": 17972, + "teacher_loss": 0.3055430054664612 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.24170561134815216, + "learning_rate": 1.345375925284163e-05, + "loss": 0.1943, + "step": 17973, + "teacher_loss": 0.18905138969421387 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.24589915573596954, + "learning_rate": 1.3451500120256806e-05, + "loss": 0.2806, + "step": 17974, + "teacher_loss": 0.284446120262146 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5890187621116638, + "learning_rate": 1.344924102317445e-05, + "loss": 0.2192, + "step": 17975, + "teacher_loss": 0.17814594507217407 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.43649035692214966, + "learning_rate": 1.3446981961646346e-05, + "loss": 0.2342, + "step": 17976, + "teacher_loss": 0.21174272894859314 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.42006468772888184, + "learning_rate": 1.3444722935724287e-05, + "loss": 0.2096, + "step": 17977, + "teacher_loss": 0.18619731068611145 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.8843041062355042, + "learning_rate": 1.3442463945460069e-05, + "loss": 0.3347, + "step": 17978, + "teacher_loss": 0.2736354470252991 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5869308114051819, + "learning_rate": 1.3440204990905483e-05, + "loss": 0.3299, + "step": 17979, + "teacher_loss": 0.3013291656970978 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.6212499737739563, + "learning_rate": 1.3437946072112317e-05, + "loss": 0.2545, + "step": 17980, + "teacher_loss": 0.21378956735134125 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.8992911577224731, + "learning_rate": 1.343568718913237e-05, + "loss": 0.3675, + "step": 17981, + "teacher_loss": 0.30844783782958984 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.3091692328453064, + "learning_rate": 1.3433428342017422e-05, + "loss": 0.2503, + "step": 17982, + "teacher_loss": 0.24377751350402832 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5269281268119812, + "learning_rate": 1.3431169530819264e-05, + "loss": 0.3066, + "step": 17983, + "teacher_loss": 0.2821333408355713 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5792193412780762, + "learning_rate": 1.3428910755589688e-05, + "loss": 0.2335, + "step": 17984, + "teacher_loss": 0.19503188133239746 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4550713896751404, + "learning_rate": 1.3426652016380479e-05, + "loss": 0.2089, + "step": 17985, + "teacher_loss": 0.18155571818351746 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5396783351898193, + "learning_rate": 1.3424393313243418e-05, + "loss": 0.2623, + "step": 17986, + "teacher_loss": 0.23149323463439941 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.42046624422073364, + "learning_rate": 1.3422134646230298e-05, + "loss": 0.202, + "step": 17987, + "teacher_loss": 0.17768613994121552 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5487052202224731, + "learning_rate": 1.3419876015392904e-05, + "loss": 0.4206, + "step": 17988, + "teacher_loss": 0.4063110053539276 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.26007553935050964, + "learning_rate": 1.341761742078301e-05, + "loss": 0.1729, + "step": 17989, + "teacher_loss": 0.16318368911743164 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.3510127067565918, + "learning_rate": 1.3415358862452403e-05, + "loss": 0.1929, + "step": 17990, + "teacher_loss": 0.17536193132400513 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.20279119908809662, + "learning_rate": 1.3413100340452869e-05, + "loss": 0.1953, + "step": 17991, + "teacher_loss": 0.1944923996925354 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5365443825721741, + "learning_rate": 1.3410841854836185e-05, + "loss": 0.208, + "step": 17992, + "teacher_loss": 0.1714564710855484 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.7745158076286316, + "learning_rate": 1.340858340565413e-05, + "loss": 0.2982, + "step": 17993, + "teacher_loss": 0.24531300365924835 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4842342734336853, + "learning_rate": 1.340632499295849e-05, + "loss": 0.2627, + "step": 17994, + "teacher_loss": 0.23804882168769836 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.33197221159935, + "learning_rate": 1.340406661680104e-05, + "loss": 0.2458, + "step": 17995, + "teacher_loss": 0.23621074855327606 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.9307555556297302, + "learning_rate": 1.3401808277233555e-05, + "loss": 0.2615, + "step": 17996, + "teacher_loss": 0.18708816170692444 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.6056849956512451, + "learning_rate": 1.3399549974307816e-05, + "loss": 0.2572, + "step": 17997, + "teacher_loss": 0.21846500039100647 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.3170185089111328, + "learning_rate": 1.3397291708075602e-05, + "loss": 0.3039, + "step": 17998, + "teacher_loss": 0.30240270495414734 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4331493377685547, + "learning_rate": 1.3395033478588675e-05, + "loss": 0.3011, + "step": 17999, + "teacher_loss": 0.28642538189888 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.355377197265625, + "learning_rate": 1.3392775285898827e-05, + "loss": 0.1787, + "step": 18000, + "teacher_loss": 0.15907607972621918 + }, + { + "epoch": 3.25, + "eval_exact_match": 80.0, + "eval_f1": 87.35321118500602, + "step": 18000 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.21056440472602844, + "learning_rate": 1.339051713005782e-05, + "loss": 0.2549, + "step": 18001, + "teacher_loss": 0.25985103845596313 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.19201895594596863, + "learning_rate": 1.3388259011117424e-05, + "loss": 0.1825, + "step": 18002, + "teacher_loss": 0.18148615956306458 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.10029195249080658, + "learning_rate": 1.3386000929129425e-05, + "loss": 0.1726, + "step": 18003, + "teacher_loss": 0.1805967390537262 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.4660463035106659, + "learning_rate": 1.3383742884145584e-05, + "loss": 0.217, + "step": 18004, + "teacher_loss": 0.18936866521835327 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.27851828932762146, + "learning_rate": 1.3381484876217669e-05, + "loss": 0.1678, + "step": 18005, + "teacher_loss": 0.15549078583717346 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.5409957766532898, + "learning_rate": 1.3379226905397456e-05, + "loss": 0.217, + "step": 18006, + "teacher_loss": 0.18100020289421082 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.3592095375061035, + "learning_rate": 1.337696897173671e-05, + "loss": 0.289, + "step": 18007, + "teacher_loss": 0.28115957975387573 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.3841264843940735, + "learning_rate": 1.3374711075287198e-05, + "loss": 0.2631, + "step": 18008, + "teacher_loss": 0.2496688961982727 + }, + { + "compression_loss": 0.0, + "epoch": 3.25, + "label_loss": 0.34814420342445374, + "learning_rate": 1.337245321610069e-05, + "loss": 0.2301, + "step": 18009, + "teacher_loss": 0.21702076494693756 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.46644219756126404, + "learning_rate": 1.3370195394228952e-05, + "loss": 0.2608, + "step": 18010, + "teacher_loss": 0.2379295378923416 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.1959199607372284, + "learning_rate": 1.3367937609723749e-05, + "loss": 0.1964, + "step": 18011, + "teacher_loss": 0.19647538661956787 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5666648149490356, + "learning_rate": 1.3365679862636833e-05, + "loss": 0.4057, + "step": 18012, + "teacher_loss": 0.3877698481082916 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.6380306482315063, + "learning_rate": 1.3363422153019988e-05, + "loss": 0.2272, + "step": 18013, + "teacher_loss": 0.18155093491077423 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.3806297779083252, + "learning_rate": 1.3361164480924965e-05, + "loss": 0.2581, + "step": 18014, + "teacher_loss": 0.24453939497470856 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.2186947762966156, + "learning_rate": 1.3358906846403522e-05, + "loss": 0.2245, + "step": 18015, + "teacher_loss": 0.2251451015472412 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.39550304412841797, + "learning_rate": 1.3356649249507428e-05, + "loss": 0.2833, + "step": 18016, + "teacher_loss": 0.2707808315753937 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 1.2079740762710571, + "learning_rate": 1.3354391690288439e-05, + "loss": 0.2558, + "step": 18017, + "teacher_loss": 0.15005674958229065 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.4492696523666382, + "learning_rate": 1.3352134168798311e-05, + "loss": 0.2227, + "step": 18018, + "teacher_loss": 0.19756457209587097 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.2646927833557129, + "learning_rate": 1.3349876685088811e-05, + "loss": 0.2221, + "step": 18019, + "teacher_loss": 0.21738159656524658 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.4837474226951599, + "learning_rate": 1.3347619239211692e-05, + "loss": 0.2339, + "step": 18020, + "teacher_loss": 0.2061486840248108 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.3486769199371338, + "learning_rate": 1.3345361831218704e-05, + "loss": 0.1858, + "step": 18021, + "teacher_loss": 0.1677398681640625 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.7297282218933105, + "learning_rate": 1.3343104461161613e-05, + "loss": 0.2969, + "step": 18022, + "teacher_loss": 0.2487574815750122 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5716339945793152, + "learning_rate": 1.3340847129092173e-05, + "loss": 0.4358, + "step": 18023, + "teacher_loss": 0.42072850465774536 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5458360910415649, + "learning_rate": 1.3338589835062123e-05, + "loss": 0.3033, + "step": 18024, + "teacher_loss": 0.2763892412185669 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.22596397995948792, + "learning_rate": 1.3336332579123238e-05, + "loss": 0.2007, + "step": 18025, + "teacher_loss": 0.19794821739196777 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.9249753952026367, + "learning_rate": 1.3334075361327254e-05, + "loss": 0.3378, + "step": 18026, + "teacher_loss": 0.2726061940193176 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.451119065284729, + "learning_rate": 1.333181818172592e-05, + "loss": 0.2331, + "step": 18027, + "teacher_loss": 0.20892536640167236 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.35619962215423584, + "learning_rate": 1.3329561040371007e-05, + "loss": 0.2347, + "step": 18028, + "teacher_loss": 0.22119031846523285 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.6137403249740601, + "learning_rate": 1.3327303937314247e-05, + "loss": 0.2654, + "step": 18029, + "teacher_loss": 0.2267158329486847 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.6352120637893677, + "learning_rate": 1.3325046872607387e-05, + "loss": 0.2759, + "step": 18030, + "teacher_loss": 0.23595969378948212 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5605087280273438, + "learning_rate": 1.3322789846302185e-05, + "loss": 0.2235, + "step": 18031, + "teacher_loss": 0.1860453486442566 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.28998863697052, + "learning_rate": 1.3320532858450382e-05, + "loss": 0.1719, + "step": 18032, + "teacher_loss": 0.15875372290611267 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5532456636428833, + "learning_rate": 1.3318275909103727e-05, + "loss": 0.23, + "step": 18033, + "teacher_loss": 0.19406282901763916 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.3889275789260864, + "learning_rate": 1.3316018998313962e-05, + "loss": 0.193, + "step": 18034, + "teacher_loss": 0.17122645676136017 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.2096666395664215, + "learning_rate": 1.3313762126132832e-05, + "loss": 0.2132, + "step": 18035, + "teacher_loss": 0.21355712413787842 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.4629286229610443, + "learning_rate": 1.3311505292612085e-05, + "loss": 0.345, + "step": 18036, + "teacher_loss": 0.33191320300102234 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.2919420003890991, + "learning_rate": 1.3309248497803451e-05, + "loss": 0.1719, + "step": 18037, + "teacher_loss": 0.1585262417793274 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5541933178901672, + "learning_rate": 1.3306991741758689e-05, + "loss": 0.6016, + "step": 18038, + "teacher_loss": 0.606877326965332 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.33001285791397095, + "learning_rate": 1.3304735024529526e-05, + "loss": 0.2032, + "step": 18039, + "teacher_loss": 0.18911322951316833 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.14874063432216644, + "learning_rate": 1.3302478346167703e-05, + "loss": 0.1585, + "step": 18040, + "teacher_loss": 0.15957880020141602 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.6319202780723572, + "learning_rate": 1.3300221706724966e-05, + "loss": 0.229, + "step": 18041, + "teacher_loss": 0.1842707246541977 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.634954035282135, + "learning_rate": 1.329796510625305e-05, + "loss": 0.4518, + "step": 18042, + "teacher_loss": 0.43140947818756104 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.32245922088623047, + "learning_rate": 1.3295708544803688e-05, + "loss": 0.2035, + "step": 18043, + "teacher_loss": 0.19022925198078156 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.270362913608551, + "learning_rate": 1.329345202242862e-05, + "loss": 0.1868, + "step": 18044, + "teacher_loss": 0.17756280303001404 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.4343642294406891, + "learning_rate": 1.3291195539179584e-05, + "loss": 0.2687, + "step": 18045, + "teacher_loss": 0.2502707242965698 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.42184072732925415, + "learning_rate": 1.3288939095108306e-05, + "loss": 0.1719, + "step": 18046, + "teacher_loss": 0.14414390921592712 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.37064820528030396, + "learning_rate": 1.3286682690266527e-05, + "loss": 0.1962, + "step": 18047, + "teacher_loss": 0.1768346130847931 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.40810659527778625, + "learning_rate": 1.3284426324705981e-05, + "loss": 0.1833, + "step": 18048, + "teacher_loss": 0.15836849808692932 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.9537204504013062, + "learning_rate": 1.3282169998478388e-05, + "loss": 0.2607, + "step": 18049, + "teacher_loss": 0.18374550342559814 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.6317575573921204, + "learning_rate": 1.3279913711635493e-05, + "loss": 0.2875, + "step": 18050, + "teacher_loss": 0.24921450018882751 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.4654926359653473, + "learning_rate": 1.3277657464229018e-05, + "loss": 0.2092, + "step": 18051, + "teacher_loss": 0.1807556450366974 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.9254947900772095, + "learning_rate": 1.3275401256310694e-05, + "loss": 0.3353, + "step": 18052, + "teacher_loss": 0.2697446048259735 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.711561918258667, + "learning_rate": 1.3273145087932243e-05, + "loss": 0.4599, + "step": 18053, + "teacher_loss": 0.4319247007369995 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.39307230710983276, + "learning_rate": 1.3270888959145402e-05, + "loss": 0.2834, + "step": 18054, + "teacher_loss": 0.27126815915107727 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.1702650785446167, + "learning_rate": 1.3268632870001895e-05, + "loss": 0.1807, + "step": 18055, + "teacher_loss": 0.1819145381450653 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.8414362668991089, + "learning_rate": 1.3266376820553442e-05, + "loss": 0.309, + "step": 18056, + "teacher_loss": 0.2498057782649994 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.8354657888412476, + "learning_rate": 1.3264120810851773e-05, + "loss": 0.2957, + "step": 18057, + "teacher_loss": 0.2356717586517334 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.5256901979446411, + "learning_rate": 1.3261864840948609e-05, + "loss": 0.2341, + "step": 18058, + "teacher_loss": 0.2017158716917038 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.3564150333404541, + "learning_rate": 1.325960891089567e-05, + "loss": 0.2669, + "step": 18059, + "teacher_loss": 0.2569480836391449 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.28577864170074463, + "learning_rate": 1.3257353020744685e-05, + "loss": 0.1658, + "step": 18060, + "teacher_loss": 0.15241412818431854 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.9722816944122314, + "learning_rate": 1.3255097170547371e-05, + "loss": 0.3675, + "step": 18061, + "teacher_loss": 0.30028021335601807 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.6382484436035156, + "learning_rate": 1.3252841360355441e-05, + "loss": 0.2562, + "step": 18062, + "teacher_loss": 0.2137523889541626 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.41152137517929077, + "learning_rate": 1.3250585590220627e-05, + "loss": 0.2049, + "step": 18063, + "teacher_loss": 0.18191435933113098 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.4244527816772461, + "learning_rate": 1.3248329860194639e-05, + "loss": 0.1799, + "step": 18064, + "teacher_loss": 0.15269345045089722 + }, + { + "compression_loss": 0.0, + "epoch": 3.26, + "label_loss": 0.3407462239265442, + "learning_rate": 1.3246074170329192e-05, + "loss": 0.2657, + "step": 18065, + "teacher_loss": 0.25739943981170654 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.39456138014793396, + "learning_rate": 1.3243818520676009e-05, + "loss": 0.2051, + "step": 18066, + "teacher_loss": 0.184077650308609 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.39879655838012695, + "learning_rate": 1.3241562911286803e-05, + "loss": 0.2359, + "step": 18067, + "teacher_loss": 0.21774812042713165 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.3525258004665375, + "learning_rate": 1.3239307342213282e-05, + "loss": 0.2211, + "step": 18068, + "teacher_loss": 0.20652933418750763 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.4380994439125061, + "learning_rate": 1.323705181350717e-05, + "loss": 0.177, + "step": 18069, + "teacher_loss": 0.1479840874671936 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5991427898406982, + "learning_rate": 1.3234796325220174e-05, + "loss": 0.2933, + "step": 18070, + "teacher_loss": 0.25932633876800537 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.38682258129119873, + "learning_rate": 1.3232540877404e-05, + "loss": 0.1937, + "step": 18071, + "teacher_loss": 0.17227378487586975 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.4379361867904663, + "learning_rate": 1.323028547011037e-05, + "loss": 0.2606, + "step": 18072, + "teacher_loss": 0.24093812704086304 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.43991994857788086, + "learning_rate": 1.3228030103390993e-05, + "loss": 0.2285, + "step": 18073, + "teacher_loss": 0.20504266023635864 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.4691389203071594, + "learning_rate": 1.3225774777297569e-05, + "loss": 0.2483, + "step": 18074, + "teacher_loss": 0.2237095832824707 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.37046217918395996, + "learning_rate": 1.3223519491881806e-05, + "loss": 0.3211, + "step": 18075, + "teacher_loss": 0.31562644243240356 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.3690827488899231, + "learning_rate": 1.3221264247195417e-05, + "loss": 0.1889, + "step": 18076, + "teacher_loss": 0.16891750693321228 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.18976879119873047, + "learning_rate": 1.3219009043290107e-05, + "loss": 0.2171, + "step": 18077, + "teacher_loss": 0.22014687955379486 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5652008652687073, + "learning_rate": 1.3216753880217577e-05, + "loss": 0.2329, + "step": 18078, + "teacher_loss": 0.19600054621696472 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.261024534702301, + "learning_rate": 1.3214498758029537e-05, + "loss": 0.2203, + "step": 18079, + "teacher_loss": 0.21579015254974365 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.32064616680145264, + "learning_rate": 1.3212243676777686e-05, + "loss": 0.2259, + "step": 18080, + "teacher_loss": 0.21533358097076416 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.6935408115386963, + "learning_rate": 1.3209988636513728e-05, + "loss": 0.2802, + "step": 18081, + "teacher_loss": 0.23425620794296265 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.2539622485637665, + "learning_rate": 1.3207733637289364e-05, + "loss": 0.1828, + "step": 18082, + "teacher_loss": 0.1749408096075058 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.3459761142730713, + "learning_rate": 1.3205478679156295e-05, + "loss": 0.2594, + "step": 18083, + "teacher_loss": 0.2497342824935913 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.41030436754226685, + "learning_rate": 1.3203223762166216e-05, + "loss": 0.1779, + "step": 18084, + "teacher_loss": 0.15209606289863586 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.2116227149963379, + "learning_rate": 1.3200968886370833e-05, + "loss": 0.2152, + "step": 18085, + "teacher_loss": 0.21561098098754883 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.24857468903064728, + "learning_rate": 1.3198714051821843e-05, + "loss": 0.2373, + "step": 18086, + "teacher_loss": 0.2360607236623764 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.4978814721107483, + "learning_rate": 1.3196459258570928e-05, + "loss": 0.306, + "step": 18087, + "teacher_loss": 0.2846578359603882 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.8321453332901001, + "learning_rate": 1.3194204506669806e-05, + "loss": 0.2858, + "step": 18088, + "teacher_loss": 0.22508911788463593 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.45448440313339233, + "learning_rate": 1.3191949796170156e-05, + "loss": 0.2183, + "step": 18089, + "teacher_loss": 0.19203674793243408 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5713562965393066, + "learning_rate": 1.3189695127123675e-05, + "loss": 0.257, + "step": 18090, + "teacher_loss": 0.222085103392601 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 1.038316011428833, + "learning_rate": 1.318744049958206e-05, + "loss": 0.3771, + "step": 18091, + "teacher_loss": 0.30360138416290283 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.2662854790687561, + "learning_rate": 1.3185185913596999e-05, + "loss": 0.2041, + "step": 18092, + "teacher_loss": 0.19720464944839478 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.2641443610191345, + "learning_rate": 1.3182931369220181e-05, + "loss": 0.2097, + "step": 18093, + "teacher_loss": 0.20367072522640228 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.24942860007286072, + "learning_rate": 1.3180676866503304e-05, + "loss": 0.2164, + "step": 18094, + "teacher_loss": 0.21272876858711243 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.7187564969062805, + "learning_rate": 1.317842240549805e-05, + "loss": 0.2844, + "step": 18095, + "teacher_loss": 0.2361488789319992 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.539593517780304, + "learning_rate": 1.3176167986256111e-05, + "loss": 0.2592, + "step": 18096, + "teacher_loss": 0.228027805685997 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.19185474514961243, + "learning_rate": 1.3173913608829164e-05, + "loss": 0.203, + "step": 18097, + "teacher_loss": 0.2042843997478485 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.6059845089912415, + "learning_rate": 1.3171659273268913e-05, + "loss": 0.406, + "step": 18098, + "teacher_loss": 0.3837599754333496 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.35299962759017944, + "learning_rate": 1.3169404979627028e-05, + "loss": 0.2136, + "step": 18099, + "teacher_loss": 0.1980898529291153 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.18739306926727295, + "learning_rate": 1.3167150727955199e-05, + "loss": 0.1745, + "step": 18100, + "teacher_loss": 0.1730639934539795 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.7790427207946777, + "learning_rate": 1.3164896518305107e-05, + "loss": 0.2996, + "step": 18101, + "teacher_loss": 0.24631893634796143 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.26252225041389465, + "learning_rate": 1.3162642350728439e-05, + "loss": 0.2028, + "step": 18102, + "teacher_loss": 0.19619476795196533 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5420483946800232, + "learning_rate": 1.316038822527687e-05, + "loss": 0.2238, + "step": 18103, + "teacher_loss": 0.1884036362171173 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.2817770838737488, + "learning_rate": 1.3158134142002085e-05, + "loss": 0.1499, + "step": 18104, + "teacher_loss": 0.13524103164672852 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.6387237310409546, + "learning_rate": 1.3155880100955764e-05, + "loss": 0.2261, + "step": 18105, + "teacher_loss": 0.1802579164505005 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.17957574129104614, + "learning_rate": 1.3153626102189578e-05, + "loss": 0.1716, + "step": 18106, + "teacher_loss": 0.1706923395395279 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.17800748348236084, + "learning_rate": 1.3151372145755215e-05, + "loss": 0.2106, + "step": 18107, + "teacher_loss": 0.2142239212989807 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.590187132358551, + "learning_rate": 1.3149118231704345e-05, + "loss": 0.2794, + "step": 18108, + "teacher_loss": 0.24484902620315552 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.38905492424964905, + "learning_rate": 1.3146864360088642e-05, + "loss": 0.1588, + "step": 18109, + "teacher_loss": 0.13320091366767883 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.2505139112472534, + "learning_rate": 1.3144610530959784e-05, + "loss": 0.2292, + "step": 18110, + "teacher_loss": 0.22679060697555542 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.3953123390674591, + "learning_rate": 1.314235674436945e-05, + "loss": 0.2419, + "step": 18111, + "teacher_loss": 0.22482754290103912 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.19311776757240295, + "learning_rate": 1.3140103000369296e-05, + "loss": 0.1627, + "step": 18112, + "teacher_loss": 0.15927964448928833 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.6625838279724121, + "learning_rate": 1.3137849299011015e-05, + "loss": 0.3446, + "step": 18113, + "teacher_loss": 0.3092654347419739 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.46393102407455444, + "learning_rate": 1.313559564034626e-05, + "loss": 0.3801, + "step": 18114, + "teacher_loss": 0.3708367645740509 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.40326282382011414, + "learning_rate": 1.313334202442671e-05, + "loss": 0.2832, + "step": 18115, + "teacher_loss": 0.26991456747055054 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.40717148780822754, + "learning_rate": 1.3131088451304026e-05, + "loss": 0.3281, + "step": 18116, + "teacher_loss": 0.31928497552871704 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.4894641041755676, + "learning_rate": 1.3128834921029885e-05, + "loss": 0.1978, + "step": 18117, + "teacher_loss": 0.16542458534240723 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5823531150817871, + "learning_rate": 1.3126581433655948e-05, + "loss": 0.2515, + "step": 18118, + "teacher_loss": 0.21468724310398102 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5762792825698853, + "learning_rate": 1.312432798923388e-05, + "loss": 0.2399, + "step": 18119, + "teacher_loss": 0.202561616897583 + }, + { + "compression_loss": 0.0, + "epoch": 3.27, + "label_loss": 0.5645594596862793, + "learning_rate": 1.312207458781535e-05, + "loss": 0.3304, + "step": 18120, + "teacher_loss": 0.3044322729110718 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3128772974014282, + "learning_rate": 1.3119821229452023e-05, + "loss": 0.1724, + "step": 18121, + "teacher_loss": 0.15684494376182556 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3212696313858032, + "learning_rate": 1.311756791419555e-05, + "loss": 0.1912, + "step": 18122, + "teacher_loss": 0.17673750221729279 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.28454113006591797, + "learning_rate": 1.3115314642097606e-05, + "loss": 0.2528, + "step": 18123, + "teacher_loss": 0.2492837905883789 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5250230431556702, + "learning_rate": 1.3113061413209848e-05, + "loss": 0.2436, + "step": 18124, + "teacher_loss": 0.212283656001091 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3426131010055542, + "learning_rate": 1.3110808227583924e-05, + "loss": 0.2314, + "step": 18125, + "teacher_loss": 0.21906372904777527 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.2884938716888428, + "learning_rate": 1.3108555085271517e-05, + "loss": 0.3581, + "step": 18126, + "teacher_loss": 0.36588048934936523 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.1889309287071228, + "learning_rate": 1.3106301986324266e-05, + "loss": 0.2166, + "step": 18127, + "teacher_loss": 0.21964265406131744 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.4153388738632202, + "learning_rate": 1.310404893079383e-05, + "loss": 0.2687, + "step": 18128, + "teacher_loss": 0.2523530423641205 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.4441133141517639, + "learning_rate": 1.3101795918731869e-05, + "loss": 0.2803, + "step": 18129, + "teacher_loss": 0.2621369957923889 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3501591682434082, + "learning_rate": 1.309954295019004e-05, + "loss": 0.1674, + "step": 18130, + "teacher_loss": 0.1470591425895691 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.8556177020072937, + "learning_rate": 1.3097290025219986e-05, + "loss": 0.3238, + "step": 18131, + "teacher_loss": 0.26474615931510925 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.4137946367263794, + "learning_rate": 1.3095037143873375e-05, + "loss": 0.2505, + "step": 18132, + "teacher_loss": 0.23240438103675842 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.2501292824745178, + "learning_rate": 1.3092784306201847e-05, + "loss": 0.2498, + "step": 18133, + "teacher_loss": 0.24974240362644196 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3543018698692322, + "learning_rate": 1.3090531512257055e-05, + "loss": 0.2388, + "step": 18134, + "teacher_loss": 0.22598996758460999 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5683243274688721, + "learning_rate": 1.3088278762090653e-05, + "loss": 0.233, + "step": 18135, + "teacher_loss": 0.19579479098320007 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.503743052482605, + "learning_rate": 1.3086026055754293e-05, + "loss": 0.2408, + "step": 18136, + "teacher_loss": 0.2115417867898941 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.2990487217903137, + "learning_rate": 1.3083773393299611e-05, + "loss": 0.1696, + "step": 18137, + "teacher_loss": 0.1552715301513672 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.46824532747268677, + "learning_rate": 1.3081520774778258e-05, + "loss": 0.1855, + "step": 18138, + "teacher_loss": 0.15413513779640198 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.25309228897094727, + "learning_rate": 1.3079268200241885e-05, + "loss": 0.1865, + "step": 18139, + "teacher_loss": 0.17913636565208435 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5562655925750732, + "learning_rate": 1.3077015669742132e-05, + "loss": 0.3, + "step": 18140, + "teacher_loss": 0.2715657651424408 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.6625136137008667, + "learning_rate": 1.3074763183330643e-05, + "loss": 0.2776, + "step": 18141, + "teacher_loss": 0.23487672209739685 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.4585106372833252, + "learning_rate": 1.3072510741059064e-05, + "loss": 0.2499, + "step": 18142, + "teacher_loss": 0.2267199456691742 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.2499270737171173, + "learning_rate": 1.3070258342979035e-05, + "loss": 0.2552, + "step": 18143, + "teacher_loss": 0.2557613253593445 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.9610921740531921, + "learning_rate": 1.3068005989142195e-05, + "loss": 0.2526, + "step": 18144, + "teacher_loss": 0.17383532226085663 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.22878341376781464, + "learning_rate": 1.3065753679600186e-05, + "loss": 0.1976, + "step": 18145, + "teacher_loss": 0.1941266804933548 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.09850089251995087, + "learning_rate": 1.3063501414404651e-05, + "loss": 0.1395, + "step": 18146, + "teacher_loss": 0.14404058456420898 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.32071763277053833, + "learning_rate": 1.3061249193607214e-05, + "loss": 0.188, + "step": 18147, + "teacher_loss": 0.17330355942249298 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.655463695526123, + "learning_rate": 1.3058997017259526e-05, + "loss": 0.4159, + "step": 18148, + "teacher_loss": 0.38927972316741943 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.523260772228241, + "learning_rate": 1.3056744885413216e-05, + "loss": 0.2105, + "step": 18149, + "teacher_loss": 0.17572741210460663 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 1.012594223022461, + "learning_rate": 1.3054492798119915e-05, + "loss": 0.4024, + "step": 18150, + "teacher_loss": 0.3345867395401001 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.23949134349822998, + "learning_rate": 1.3052240755431268e-05, + "loss": 0.2107, + "step": 18151, + "teacher_loss": 0.20754443109035492 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.37489616870880127, + "learning_rate": 1.3049988757398898e-05, + "loss": 0.2046, + "step": 18152, + "teacher_loss": 0.1856551170349121 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.614890456199646, + "learning_rate": 1.3047736804074439e-05, + "loss": 0.1975, + "step": 18153, + "teacher_loss": 0.1511019915342331 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.6322044134140015, + "learning_rate": 1.304548489550952e-05, + "loss": 0.283, + "step": 18154, + "teacher_loss": 0.24421021342277527 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5281765460968018, + "learning_rate": 1.3043233031755778e-05, + "loss": 0.2514, + "step": 18155, + "teacher_loss": 0.22064149379730225 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.481297105550766, + "learning_rate": 1.3040981212864832e-05, + "loss": 0.2876, + "step": 18156, + "teacher_loss": 0.2661042809486389 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.23492732644081116, + "learning_rate": 1.3038729438888311e-05, + "loss": 0.2415, + "step": 18157, + "teacher_loss": 0.2422710657119751 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.2689550220966339, + "learning_rate": 1.3036477709877849e-05, + "loss": 0.209, + "step": 18158, + "teacher_loss": 0.20228593051433563 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.4091396927833557, + "learning_rate": 1.3034226025885067e-05, + "loss": 0.3432, + "step": 18159, + "teacher_loss": 0.33587849140167236 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 1.1813099384307861, + "learning_rate": 1.3031974386961578e-05, + "loss": 0.3229, + "step": 18160, + "teacher_loss": 0.22749581933021545 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5107371211051941, + "learning_rate": 1.302972279315903e-05, + "loss": 0.3707, + "step": 18161, + "teacher_loss": 0.3551744222640991 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3317374885082245, + "learning_rate": 1.3027471244529023e-05, + "loss": 0.1907, + "step": 18162, + "teacher_loss": 0.175077423453331 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.8840545415878296, + "learning_rate": 1.3025219741123185e-05, + "loss": 0.3057, + "step": 18163, + "teacher_loss": 0.24141347408294678 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.510932981967926, + "learning_rate": 1.3022968282993143e-05, + "loss": 0.1835, + "step": 18164, + "teacher_loss": 0.14714449644088745 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.285107284784317, + "learning_rate": 1.3020716870190507e-05, + "loss": 0.1627, + "step": 18165, + "teacher_loss": 0.14914417266845703 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5745794177055359, + "learning_rate": 1.3018465502766899e-05, + "loss": 0.2214, + "step": 18166, + "teacher_loss": 0.18211030960083008 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.5230456590652466, + "learning_rate": 1.3016214180773937e-05, + "loss": 0.2142, + "step": 18167, + "teacher_loss": 0.1799221783876419 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.40999191999435425, + "learning_rate": 1.3013962904263237e-05, + "loss": 0.286, + "step": 18168, + "teacher_loss": 0.2722364664077759 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.6786731481552124, + "learning_rate": 1.3011711673286411e-05, + "loss": 0.2253, + "step": 18169, + "teacher_loss": 0.1748993843793869 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.20154336094856262, + "learning_rate": 1.3009460487895078e-05, + "loss": 0.3155, + "step": 18170, + "teacher_loss": 0.32811373472213745 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.21616467833518982, + "learning_rate": 1.3007209348140849e-05, + "loss": 0.1604, + "step": 18171, + "teacher_loss": 0.15423047542572021 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.4753044545650482, + "learning_rate": 1.3004958254075329e-05, + "loss": 0.23, + "step": 18172, + "teacher_loss": 0.20273320376873016 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.28975003957748413, + "learning_rate": 1.3002707205750142e-05, + "loss": 0.3469, + "step": 18173, + "teacher_loss": 0.35327431559562683 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.3585415482521057, + "learning_rate": 1.3000456203216887e-05, + "loss": 0.2133, + "step": 18174, + "teacher_loss": 0.19721579551696777 + }, + { + "compression_loss": 0.0, + "epoch": 3.28, + "label_loss": 0.8126368522644043, + "learning_rate": 1.2998205246527171e-05, + "loss": 0.6232, + "step": 18175, + "teacher_loss": 0.6021868586540222 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4043880105018616, + "learning_rate": 1.2995954335732615e-05, + "loss": 0.3485, + "step": 18176, + "teacher_loss": 0.3422878384590149 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.2915797233581543, + "learning_rate": 1.2993703470884818e-05, + "loss": 0.1848, + "step": 18177, + "teacher_loss": 0.17288590967655182 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.25596755743026733, + "learning_rate": 1.2991452652035383e-05, + "loss": 0.1618, + "step": 18178, + "teacher_loss": 0.15136650204658508 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.3866546154022217, + "learning_rate": 1.2989201879235912e-05, + "loss": 0.2673, + "step": 18179, + "teacher_loss": 0.25404810905456543 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.667992115020752, + "learning_rate": 1.2986951152538019e-05, + "loss": 0.3579, + "step": 18180, + "teacher_loss": 0.32343828678131104 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.12965041399002075, + "learning_rate": 1.2984700471993299e-05, + "loss": 0.213, + "step": 18181, + "teacher_loss": 0.22230014204978943 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4179753065109253, + "learning_rate": 1.2982449837653354e-05, + "loss": 0.2423, + "step": 18182, + "teacher_loss": 0.22282329201698303 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.5086942911148071, + "learning_rate": 1.2980199249569785e-05, + "loss": 0.218, + "step": 18183, + "teacher_loss": 0.18568812310695648 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.9095141887664795, + "learning_rate": 1.2977948707794196e-05, + "loss": 0.3201, + "step": 18184, + "teacher_loss": 0.25462767481803894 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.5382750034332275, + "learning_rate": 1.2975698212378173e-05, + "loss": 0.3276, + "step": 18185, + "teacher_loss": 0.30420225858688354 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.5130690336227417, + "learning_rate": 1.297344776337333e-05, + "loss": 0.2472, + "step": 18186, + "teacher_loss": 0.2176552563905716 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.6596359610557556, + "learning_rate": 1.2971197360831249e-05, + "loss": 0.476, + "step": 18187, + "teacher_loss": 0.4556193947792053 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.3123575448989868, + "learning_rate": 1.2968947004803526e-05, + "loss": 0.1933, + "step": 18188, + "teacher_loss": 0.18007208406925201 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.9957927465438843, + "learning_rate": 1.2966696695341764e-05, + "loss": 0.3092, + "step": 18189, + "teacher_loss": 0.23295804858207703 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.29672303795814514, + "learning_rate": 1.296444643249755e-05, + "loss": 0.1879, + "step": 18190, + "teacher_loss": 0.17577509582042694 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.6289877891540527, + "learning_rate": 1.2962196216322474e-05, + "loss": 0.2154, + "step": 18191, + "teacher_loss": 0.1694399118423462 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.12931294739246368, + "learning_rate": 1.295994604686813e-05, + "loss": 0.1475, + "step": 18192, + "teacher_loss": 0.14950107038021088 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4452768564224243, + "learning_rate": 1.295769592418611e-05, + "loss": 0.3163, + "step": 18193, + "teacher_loss": 0.30198732018470764 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.2548239231109619, + "learning_rate": 1.2955445848327994e-05, + "loss": 0.2077, + "step": 18194, + "teacher_loss": 0.2024780809879303 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.41416066884994507, + "learning_rate": 1.2953195819345378e-05, + "loss": 0.2907, + "step": 18195, + "teacher_loss": 0.27695387601852417 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.29538631439208984, + "learning_rate": 1.2950945837289849e-05, + "loss": 0.1894, + "step": 18196, + "teacher_loss": 0.1775709092617035 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.47110238671302795, + "learning_rate": 1.2948695902212978e-05, + "loss": 0.2123, + "step": 18197, + "teacher_loss": 0.1835511028766632 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.5581475496292114, + "learning_rate": 1.2946446014166371e-05, + "loss": 0.2312, + "step": 18198, + "teacher_loss": 0.19492009282112122 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.39313197135925293, + "learning_rate": 1.2944196173201596e-05, + "loss": 0.2535, + "step": 18199, + "teacher_loss": 0.23802423477172852 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.1593824028968811, + "learning_rate": 1.294194637937024e-05, + "loss": 0.1766, + "step": 18200, + "teacher_loss": 0.17854812741279602 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.33885809779167175, + "learning_rate": 1.2939696632723877e-05, + "loss": 0.2406, + "step": 18201, + "teacher_loss": 0.22966915369033813 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4542231857776642, + "learning_rate": 1.29374469333141e-05, + "loss": 0.2598, + "step": 18202, + "teacher_loss": 0.23821038007736206 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.6547744274139404, + "learning_rate": 1.293519728119248e-05, + "loss": 0.2471, + "step": 18203, + "teacher_loss": 0.20182767510414124 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.2911924421787262, + "learning_rate": 1.293294767641059e-05, + "loss": 0.173, + "step": 18204, + "teacher_loss": 0.15986818075180054 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.2568390667438507, + "learning_rate": 1.2930698119020017e-05, + "loss": 0.1736, + "step": 18205, + "teacher_loss": 0.16432306170463562 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4197365641593933, + "learning_rate": 1.2928448609072335e-05, + "loss": 0.2282, + "step": 18206, + "teacher_loss": 0.20687752962112427 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.23294541239738464, + "learning_rate": 1.292619914661911e-05, + "loss": 0.1677, + "step": 18207, + "teacher_loss": 0.16040048003196716 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.19515679776668549, + "learning_rate": 1.2923949731711925e-05, + "loss": 0.1699, + "step": 18208, + "teacher_loss": 0.16708455979824066 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.25613880157470703, + "learning_rate": 1.292170036440235e-05, + "loss": 0.2256, + "step": 18209, + "teacher_loss": 0.22223928570747375 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.32962068915367126, + "learning_rate": 1.2919451044741945e-05, + "loss": 0.1718, + "step": 18210, + "teacher_loss": 0.15424056351184845 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.37333303689956665, + "learning_rate": 1.29172017727823e-05, + "loss": 0.203, + "step": 18211, + "teacher_loss": 0.1840239018201828 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.31904950737953186, + "learning_rate": 1.291495254857497e-05, + "loss": 0.2049, + "step": 18212, + "teacher_loss": 0.19222334027290344 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.2840752899646759, + "learning_rate": 1.2912703372171524e-05, + "loss": 0.1784, + "step": 18213, + "teacher_loss": 0.16664424538612366 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.5742809772491455, + "learning_rate": 1.2910454243623537e-05, + "loss": 0.2592, + "step": 18214, + "teacher_loss": 0.2242123931646347 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.20950889587402344, + "learning_rate": 1.2908205162982568e-05, + "loss": 0.2758, + "step": 18215, + "teacher_loss": 0.2831608057022095 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.28280192613601685, + "learning_rate": 1.2905956130300179e-05, + "loss": 0.1847, + "step": 18216, + "teacher_loss": 0.17380356788635254 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.22982732951641083, + "learning_rate": 1.290370714562794e-05, + "loss": 0.1681, + "step": 18217, + "teacher_loss": 0.16121140122413635 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.3928210735321045, + "learning_rate": 1.2901458209017413e-05, + "loss": 0.2426, + "step": 18218, + "teacher_loss": 0.22593817114830017 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4701545834541321, + "learning_rate": 1.2899209320520159e-05, + "loss": 0.251, + "step": 18219, + "teacher_loss": 0.22662797570228577 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.14719830453395844, + "learning_rate": 1.2896960480187727e-05, + "loss": 0.243, + "step": 18220, + "teacher_loss": 0.25367259979248047 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.3538978397846222, + "learning_rate": 1.2894711688071698e-05, + "loss": 0.3386, + "step": 18221, + "teacher_loss": 0.3369472622871399 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.28509244322776794, + "learning_rate": 1.2892462944223613e-05, + "loss": 0.1747, + "step": 18222, + "teacher_loss": 0.1623988151550293 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.8829365968704224, + "learning_rate": 1.2890214248695032e-05, + "loss": 0.3098, + "step": 18223, + "teacher_loss": 0.24607492983341217 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.3881641626358032, + "learning_rate": 1.2887965601537515e-05, + "loss": 0.2366, + "step": 18224, + "teacher_loss": 0.2197500765323639 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.44385743141174316, + "learning_rate": 1.2885717002802616e-05, + "loss": 0.2897, + "step": 18225, + "teacher_loss": 0.2725197374820709 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.738135576248169, + "learning_rate": 1.288346845254188e-05, + "loss": 0.2768, + "step": 18226, + "teacher_loss": 0.22550088167190552 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.30323755741119385, + "learning_rate": 1.2881219950806875e-05, + "loss": 0.211, + "step": 18227, + "teacher_loss": 0.20074941217899323 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.9332331418991089, + "learning_rate": 1.2878971497649142e-05, + "loss": 0.2761, + "step": 18228, + "teacher_loss": 0.2030966579914093 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.4225686192512512, + "learning_rate": 1.287672309312023e-05, + "loss": 0.3973, + "step": 18229, + "teacher_loss": 0.3944445252418518 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.26022791862487793, + "learning_rate": 1.2874474737271695e-05, + "loss": 0.2484, + "step": 18230, + "teacher_loss": 0.2470693588256836 + }, + { + "compression_loss": 0.0, + "epoch": 3.29, + "label_loss": 0.22164443135261536, + "learning_rate": 1.287222643015508e-05, + "loss": 0.1394, + "step": 18231, + "teacher_loss": 0.13028313219547272 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.3618132174015045, + "learning_rate": 1.2869978171821933e-05, + "loss": 0.2839, + "step": 18232, + "teacher_loss": 0.2752948999404907 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.9462795853614807, + "learning_rate": 1.2867729962323803e-05, + "loss": 0.3064, + "step": 18233, + "teacher_loss": 0.23527707159519196 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.3360162377357483, + "learning_rate": 1.2865481801712235e-05, + "loss": 0.1706, + "step": 18234, + "teacher_loss": 0.15221315622329712 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.28276145458221436, + "learning_rate": 1.286323369003876e-05, + "loss": 0.2206, + "step": 18235, + "teacher_loss": 0.21366214752197266 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5406668782234192, + "learning_rate": 1.2860985627354939e-05, + "loss": 0.2928, + "step": 18236, + "teacher_loss": 0.26523077487945557 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.42783766984939575, + "learning_rate": 1.2858737613712302e-05, + "loss": 0.1918, + "step": 18237, + "teacher_loss": 0.1655229926109314 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.7270276546478271, + "learning_rate": 1.2856489649162386e-05, + "loss": 0.4084, + "step": 18238, + "teacher_loss": 0.3730219602584839 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.21659454703330994, + "learning_rate": 1.2854241733756741e-05, + "loss": 0.1323, + "step": 18239, + "teacher_loss": 0.12296003103256226 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.7499873042106628, + "learning_rate": 1.2851993867546902e-05, + "loss": 0.3206, + "step": 18240, + "teacher_loss": 0.272857129573822 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5469962954521179, + "learning_rate": 1.2849746050584402e-05, + "loss": 0.342, + "step": 18241, + "teacher_loss": 0.3191797137260437 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.24198998510837555, + "learning_rate": 1.2847498282920773e-05, + "loss": 0.3538, + "step": 18242, + "teacher_loss": 0.36617863178253174 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.6941623687744141, + "learning_rate": 1.284525056460756e-05, + "loss": 0.4705, + "step": 18243, + "teacher_loss": 0.44569897651672363 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.8404003381729126, + "learning_rate": 1.2843002895696294e-05, + "loss": 0.2617, + "step": 18244, + "teacher_loss": 0.19744521379470825 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.4416939914226532, + "learning_rate": 1.2840755276238494e-05, + "loss": 0.3449, + "step": 18245, + "teacher_loss": 0.33420050144195557 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.2236153781414032, + "learning_rate": 1.2838507706285712e-05, + "loss": 0.2365, + "step": 18246, + "teacher_loss": 0.2379852682352066 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.203488290309906, + "learning_rate": 1.2836260185889466e-05, + "loss": 0.1725, + "step": 18247, + "teacher_loss": 0.1690545380115509 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.37065887451171875, + "learning_rate": 1.283401271510128e-05, + "loss": 0.2925, + "step": 18248, + "teacher_loss": 0.28385210037231445 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5756394267082214, + "learning_rate": 1.2831765293972695e-05, + "loss": 0.2472, + "step": 18249, + "teacher_loss": 0.21070127189159393 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.14653512835502625, + "learning_rate": 1.2829517922555233e-05, + "loss": 0.1453, + "step": 18250, + "teacher_loss": 0.14512351155281067 + }, + { + "epoch": 3.3, + "eval_exact_match": 80.00946073793756, + "eval_f1": 87.31498454197734, + "step": 18250 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.35064953565597534, + "learning_rate": 1.282727060090041e-05, + "loss": 0.2029, + "step": 18251, + "teacher_loss": 0.186465322971344 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5890791416168213, + "learning_rate": 1.2825023329059764e-05, + "loss": 0.2389, + "step": 18252, + "teacher_loss": 0.19997096061706543 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.6370557546615601, + "learning_rate": 1.282277610708481e-05, + "loss": 0.2301, + "step": 18253, + "teacher_loss": 0.18492162227630615 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.8886950016021729, + "learning_rate": 1.282052893502707e-05, + "loss": 0.309, + "step": 18254, + "teacher_loss": 0.2446364164352417 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.750601053237915, + "learning_rate": 1.281828181293807e-05, + "loss": 0.319, + "step": 18255, + "teacher_loss": 0.2710520625114441 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.4397768974304199, + "learning_rate": 1.281603474086933e-05, + "loss": 0.2175, + "step": 18256, + "teacher_loss": 0.19277824461460114 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5954399108886719, + "learning_rate": 1.2813787718872358e-05, + "loss": 0.1936, + "step": 18257, + "teacher_loss": 0.14895348250865936 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.4053216576576233, + "learning_rate": 1.2811540746998683e-05, + "loss": 0.2253, + "step": 18258, + "teacher_loss": 0.2052481323480606 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.2893517017364502, + "learning_rate": 1.2809293825299822e-05, + "loss": 0.2267, + "step": 18259, + "teacher_loss": 0.21978828310966492 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5441051721572876, + "learning_rate": 1.2807046953827282e-05, + "loss": 0.2301, + "step": 18260, + "teacher_loss": 0.19517385959625244 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.4789500832557678, + "learning_rate": 1.2804800132632576e-05, + "loss": 0.199, + "step": 18261, + "teacher_loss": 0.16787350177764893 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.4673159122467041, + "learning_rate": 1.2802553361767226e-05, + "loss": 0.2215, + "step": 18262, + "teacher_loss": 0.19424059987068176 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 1.1354711055755615, + "learning_rate": 1.2800306641282739e-05, + "loss": 0.3325, + "step": 18263, + "teacher_loss": 0.24322986602783203 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.5315986275672913, + "learning_rate": 1.2798059971230622e-05, + "loss": 0.2459, + "step": 18264, + "teacher_loss": 0.21417085826396942 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.27937644720077515, + "learning_rate": 1.279581335166239e-05, + "loss": 0.3183, + "step": 18265, + "teacher_loss": 0.3226276636123657 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.3810928165912628, + "learning_rate": 1.2793566782629552e-05, + "loss": 0.2505, + "step": 18266, + "teacher_loss": 0.2359771579504013 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.6993427872657776, + "learning_rate": 1.2791320264183607e-05, + "loss": 0.2301, + "step": 18267, + "teacher_loss": 0.17799808084964752 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.1814243048429489, + "learning_rate": 1.2789073796376071e-05, + "loss": 0.1558, + "step": 18268, + "teacher_loss": 0.15291723608970642 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.8807456493377686, + "learning_rate": 1.2786827379258446e-05, + "loss": 0.3443, + "step": 18269, + "teacher_loss": 0.28465864062309265 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.22938352823257446, + "learning_rate": 1.2784581012882227e-05, + "loss": 0.188, + "step": 18270, + "teacher_loss": 0.1834345906972885 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.8353972434997559, + "learning_rate": 1.2782334697298931e-05, + "loss": 0.254, + "step": 18271, + "teacher_loss": 0.18939417600631714 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.772862434387207, + "learning_rate": 1.2780088432560048e-05, + "loss": 0.2997, + "step": 18272, + "teacher_loss": 0.2471664696931839 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.49294817447662354, + "learning_rate": 1.2777842218717075e-05, + "loss": 0.2572, + "step": 18273, + "teacher_loss": 0.23105153441429138 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.2970609962940216, + "learning_rate": 1.2775596055821527e-05, + "loss": 0.1633, + "step": 18274, + "teacher_loss": 0.14845848083496094 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.27919793128967285, + "learning_rate": 1.277334994392489e-05, + "loss": 0.1756, + "step": 18275, + "teacher_loss": 0.1640544980764389 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 1.4128801822662354, + "learning_rate": 1.2771103883078658e-05, + "loss": 0.4013, + "step": 18276, + "teacher_loss": 0.2888622283935547 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.302116334438324, + "learning_rate": 1.2768857873334338e-05, + "loss": 0.2102, + "step": 18277, + "teacher_loss": 0.20002323389053345 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.33971232175827026, + "learning_rate": 1.2766611914743415e-05, + "loss": 0.2179, + "step": 18278, + "teacher_loss": 0.2043740302324295 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.4176620841026306, + "learning_rate": 1.2764366007357382e-05, + "loss": 0.1551, + "step": 18279, + "teacher_loss": 0.12589576840400696 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.559597373008728, + "learning_rate": 1.2762120151227737e-05, + "loss": 0.2546, + "step": 18280, + "teacher_loss": 0.22076496481895447 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.6185896396636963, + "learning_rate": 1.2759874346405967e-05, + "loss": 0.2561, + "step": 18281, + "teacher_loss": 0.21583634614944458 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.3272697329521179, + "learning_rate": 1.2757628592943568e-05, + "loss": 0.1968, + "step": 18282, + "teacher_loss": 0.18230679631233215 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.3319544792175293, + "learning_rate": 1.275538289089201e-05, + "loss": 0.3205, + "step": 18283, + "teacher_loss": 0.31917887926101685 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.1651412844657898, + "learning_rate": 1.2753137240302801e-05, + "loss": 0.1671, + "step": 18284, + "teacher_loss": 0.16733206808567047 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.16619503498077393, + "learning_rate": 1.2750891641227418e-05, + "loss": 0.1995, + "step": 18285, + "teacher_loss": 0.20316170156002045 + }, + { + "compression_loss": 0.0, + "epoch": 3.3, + "label_loss": 0.3740761876106262, + "learning_rate": 1.2748646093717342e-05, + "loss": 0.2256, + "step": 18286, + "teacher_loss": 0.20914016664028168 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.6385414004325867, + "learning_rate": 1.2746400597824066e-05, + "loss": 0.2711, + "step": 18287, + "teacher_loss": 0.23029184341430664 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5312670469284058, + "learning_rate": 1.2744155153599065e-05, + "loss": 0.2433, + "step": 18288, + "teacher_loss": 0.21129730343818665 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.13044904172420502, + "learning_rate": 1.2741909761093822e-05, + "loss": 0.1406, + "step": 18289, + "teacher_loss": 0.1417396068572998 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.407657265663147, + "learning_rate": 1.273966442035982e-05, + "loss": 0.2388, + "step": 18290, + "teacher_loss": 0.22000998258590698 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.22546306252479553, + "learning_rate": 1.2737419131448537e-05, + "loss": 0.1509, + "step": 18291, + "teacher_loss": 0.14261513948440552 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.1789315640926361, + "learning_rate": 1.2735173894411445e-05, + "loss": 0.1487, + "step": 18292, + "teacher_loss": 0.14535734057426453 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5600394010543823, + "learning_rate": 1.273292870930003e-05, + "loss": 0.1949, + "step": 18293, + "teacher_loss": 0.15432533621788025 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.21180686354637146, + "learning_rate": 1.2730683576165767e-05, + "loss": 0.1712, + "step": 18294, + "teacher_loss": 0.16667698323726654 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.585758626461029, + "learning_rate": 1.2728438495060113e-05, + "loss": 0.259, + "step": 18295, + "teacher_loss": 0.2226904183626175 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.29648149013519287, + "learning_rate": 1.2726193466034566e-05, + "loss": 0.1632, + "step": 18296, + "teacher_loss": 0.14843764901161194 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.3029516339302063, + "learning_rate": 1.272394848914058e-05, + "loss": 0.229, + "step": 18297, + "teacher_loss": 0.22077150642871857 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.46519458293914795, + "learning_rate": 1.2721703564429627e-05, + "loss": 0.4411, + "step": 18298, + "teacher_loss": 0.4384276568889618 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.33070623874664307, + "learning_rate": 1.271945869195319e-05, + "loss": 0.2246, + "step": 18299, + "teacher_loss": 0.21280014514923096 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.3734726309776306, + "learning_rate": 1.2717213871762723e-05, + "loss": 0.2762, + "step": 18300, + "teacher_loss": 0.2653909921646118 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.6344031691551208, + "learning_rate": 1.2714969103909695e-05, + "loss": 0.3295, + "step": 18301, + "teacher_loss": 0.2956312894821167 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.2561222314834595, + "learning_rate": 1.271272438844558e-05, + "loss": 0.2091, + "step": 18302, + "teacher_loss": 0.20388305187225342 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.3513815999031067, + "learning_rate": 1.2710479725421834e-05, + "loss": 0.1924, + "step": 18303, + "teacher_loss": 0.17474044859409332 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.18251383304595947, + "learning_rate": 1.2708235114889925e-05, + "loss": 0.1789, + "step": 18304, + "teacher_loss": 0.17854559421539307 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.779865026473999, + "learning_rate": 1.2705990556901311e-05, + "loss": 0.3357, + "step": 18305, + "teacher_loss": 0.2863811254501343 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5663478970527649, + "learning_rate": 1.270374605150746e-05, + "loss": 0.345, + "step": 18306, + "teacher_loss": 0.3204531967639923 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.3370470702648163, + "learning_rate": 1.2701501598759828e-05, + "loss": 0.1923, + "step": 18307, + "teacher_loss": 0.1762060970067978 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.47871798276901245, + "learning_rate": 1.2699257198709863e-05, + "loss": 0.2636, + "step": 18308, + "teacher_loss": 0.2396763563156128 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5074809193611145, + "learning_rate": 1.2697012851409045e-05, + "loss": 0.2455, + "step": 18309, + "teacher_loss": 0.21635743975639343 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.3551393747329712, + "learning_rate": 1.2694768556908815e-05, + "loss": 0.3121, + "step": 18310, + "teacher_loss": 0.3073699474334717 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.36625397205352783, + "learning_rate": 1.2692524315260625e-05, + "loss": 0.2513, + "step": 18311, + "teacher_loss": 0.23851361870765686 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5638904571533203, + "learning_rate": 1.2690280126515937e-05, + "loss": 0.2424, + "step": 18312, + "teacher_loss": 0.20671653747558594 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.4471657872200012, + "learning_rate": 1.2688035990726202e-05, + "loss": 0.2748, + "step": 18313, + "teacher_loss": 0.25560474395751953 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.2671241760253906, + "learning_rate": 1.2685791907942866e-05, + "loss": 0.1859, + "step": 18314, + "teacher_loss": 0.17683741450309753 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.6140532493591309, + "learning_rate": 1.2683547878217388e-05, + "loss": 0.2867, + "step": 18315, + "teacher_loss": 0.2503645122051239 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.7801412343978882, + "learning_rate": 1.268130390160121e-05, + "loss": 0.3084, + "step": 18316, + "teacher_loss": 0.2559375762939453 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5742823481559753, + "learning_rate": 1.267905997814578e-05, + "loss": 0.2575, + "step": 18317, + "teacher_loss": 0.2222837507724762 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.4269393980503082, + "learning_rate": 1.267681610790255e-05, + "loss": 0.2846, + "step": 18318, + "teacher_loss": 0.2688036262989044 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.8354735970497131, + "learning_rate": 1.2674572290922963e-05, + "loss": 0.6003, + "step": 18319, + "teacher_loss": 0.5741674900054932 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.48213791847229004, + "learning_rate": 1.2672328527258452e-05, + "loss": 0.2572, + "step": 18320, + "teacher_loss": 0.2322022020816803 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5484969615936279, + "learning_rate": 1.267008481696048e-05, + "loss": 0.3822, + "step": 18321, + "teacher_loss": 0.3637485206127167 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.4675028324127197, + "learning_rate": 1.2667841160080474e-05, + "loss": 0.391, + "step": 18322, + "teacher_loss": 0.38248568773269653 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.511128306388855, + "learning_rate": 1.2665597556669876e-05, + "loss": 0.1702, + "step": 18323, + "teacher_loss": 0.13231760263442993 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5984614491462708, + "learning_rate": 1.2663354006780127e-05, + "loss": 0.2211, + "step": 18324, + "teacher_loss": 0.17914672195911407 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.4673621654510498, + "learning_rate": 1.2661110510462666e-05, + "loss": 0.22, + "step": 18325, + "teacher_loss": 0.19250322878360748 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.40711402893066406, + "learning_rate": 1.2658867067768929e-05, + "loss": 0.2386, + "step": 18326, + "teacher_loss": 0.21986910700798035 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.33419954776763916, + "learning_rate": 1.265662367875035e-05, + "loss": 0.2598, + "step": 18327, + "teacher_loss": 0.25149330496788025 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.1730618178844452, + "learning_rate": 1.2654380343458366e-05, + "loss": 0.2012, + "step": 18328, + "teacher_loss": 0.20434805750846863 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.633500337600708, + "learning_rate": 1.2652137061944407e-05, + "loss": 0.3769, + "step": 18329, + "teacher_loss": 0.34837621450424194 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.37324994802474976, + "learning_rate": 1.2649893834259904e-05, + "loss": 0.1901, + "step": 18330, + "teacher_loss": 0.16978952288627625 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.4630226492881775, + "learning_rate": 1.2647650660456293e-05, + "loss": 0.289, + "step": 18331, + "teacher_loss": 0.26962825655937195 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.7017282247543335, + "learning_rate": 1.2645407540585e-05, + "loss": 0.2923, + "step": 18332, + "teacher_loss": 0.24678532779216766 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.41106170415878296, + "learning_rate": 1.2643164474697444e-05, + "loss": 0.168, + "step": 18333, + "teacher_loss": 0.14096488058567047 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.24452099204063416, + "learning_rate": 1.2640921462845073e-05, + "loss": 0.1818, + "step": 18334, + "teacher_loss": 0.17480576038360596 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5192121267318726, + "learning_rate": 1.2638678505079295e-05, + "loss": 0.3551, + "step": 18335, + "teacher_loss": 0.3368752598762512 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.2798868715763092, + "learning_rate": 1.2636435601451537e-05, + "loss": 0.2378, + "step": 18336, + "teacher_loss": 0.23314549028873444 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.7776020765304565, + "learning_rate": 1.2634192752013225e-05, + "loss": 0.2738, + "step": 18337, + "teacher_loss": 0.21782538294792175 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.2635144591331482, + "learning_rate": 1.2631949956815782e-05, + "loss": 0.2119, + "step": 18338, + "teacher_loss": 0.2061435580253601 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.37651556730270386, + "learning_rate": 1.2629707215910624e-05, + "loss": 0.1881, + "step": 18339, + "teacher_loss": 0.1671895980834961 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.5983116626739502, + "learning_rate": 1.2627464529349175e-05, + "loss": 0.2594, + "step": 18340, + "teacher_loss": 0.22173020243644714 + }, + { + "compression_loss": 0.0, + "epoch": 3.31, + "label_loss": 0.4211875796318054, + "learning_rate": 1.262522189718285e-05, + "loss": 0.4726, + "step": 18341, + "teacher_loss": 0.47836044430732727 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.496562659740448, + "learning_rate": 1.2622979319463065e-05, + "loss": 0.2551, + "step": 18342, + "teacher_loss": 0.22828659415245056 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.502152144908905, + "learning_rate": 1.2620736796241243e-05, + "loss": 0.2828, + "step": 18343, + "teacher_loss": 0.2584381103515625 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3125140070915222, + "learning_rate": 1.2618494327568794e-05, + "loss": 0.2428, + "step": 18344, + "teacher_loss": 0.23502734303474426 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.7602489590644836, + "learning_rate": 1.2616251913497126e-05, + "loss": 0.3092, + "step": 18345, + "teacher_loss": 0.2591143250465393 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3397023379802704, + "learning_rate": 1.2614009554077652e-05, + "loss": 0.2134, + "step": 18346, + "teacher_loss": 0.1993771195411682 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.43602254986763, + "learning_rate": 1.261176724936179e-05, + "loss": 0.2482, + "step": 18347, + "teacher_loss": 0.2273297905921936 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.4830107092857361, + "learning_rate": 1.2609524999400943e-05, + "loss": 0.1912, + "step": 18348, + "teacher_loss": 0.15874908864498138 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.7457711696624756, + "learning_rate": 1.2607282804246519e-05, + "loss": 0.2458, + "step": 18349, + "teacher_loss": 0.19030043482780457 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.623133659362793, + "learning_rate": 1.2605040663949928e-05, + "loss": 0.3886, + "step": 18350, + "teacher_loss": 0.3625381290912628 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.13423186540603638, + "learning_rate": 1.2602798578562575e-05, + "loss": 0.199, + "step": 18351, + "teacher_loss": 0.20618563890457153 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3188090920448303, + "learning_rate": 1.2600556548135861e-05, + "loss": 0.2279, + "step": 18352, + "teacher_loss": 0.21781525015830994 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.6210907101631165, + "learning_rate": 1.2598314572721193e-05, + "loss": 0.3078, + "step": 18353, + "teacher_loss": 0.2730112075805664 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.19715633988380432, + "learning_rate": 1.2596072652369973e-05, + "loss": 0.1611, + "step": 18354, + "teacher_loss": 0.1571391522884369 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.6174234747886658, + "learning_rate": 1.2593830787133594e-05, + "loss": 0.244, + "step": 18355, + "teacher_loss": 0.20252615213394165 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.5951739549636841, + "learning_rate": 1.2591588977063466e-05, + "loss": 0.4373, + "step": 18356, + "teacher_loss": 0.4197431802749634 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.15973827242851257, + "learning_rate": 1.2589347222210986e-05, + "loss": 0.1897, + "step": 18357, + "teacher_loss": 0.19298920035362244 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2966060936450958, + "learning_rate": 1.2587105522627535e-05, + "loss": 0.2353, + "step": 18358, + "teacher_loss": 0.2285279929637909 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.5642554759979248, + "learning_rate": 1.2584863878364533e-05, + "loss": 0.2607, + "step": 18359, + "teacher_loss": 0.22693070769309998 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.40447473526000977, + "learning_rate": 1.2582622289473355e-05, + "loss": 0.2848, + "step": 18360, + "teacher_loss": 0.2715034484863281 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.4652654230594635, + "learning_rate": 1.2580380756005399e-05, + "loss": 0.2666, + "step": 18361, + "teacher_loss": 0.24455666542053223 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.9684268236160278, + "learning_rate": 1.2578139278012059e-05, + "loss": 0.2885, + "step": 18362, + "teacher_loss": 0.21289989352226257 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2056064009666443, + "learning_rate": 1.2575897855544725e-05, + "loss": 0.1709, + "step": 18363, + "teacher_loss": 0.16708341240882874 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2496483027935028, + "learning_rate": 1.2573656488654785e-05, + "loss": 0.1559, + "step": 18364, + "teacher_loss": 0.1455153524875641 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.18078044056892395, + "learning_rate": 1.2571415177393626e-05, + "loss": 0.1819, + "step": 18365, + "teacher_loss": 0.18197625875473022 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.4521269202232361, + "learning_rate": 1.2569173921812637e-05, + "loss": 0.2203, + "step": 18366, + "teacher_loss": 0.19458147883415222 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.22433842718601227, + "learning_rate": 1.2566932721963206e-05, + "loss": 0.1908, + "step": 18367, + "teacher_loss": 0.18707457184791565 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.6782110929489136, + "learning_rate": 1.2564691577896703e-05, + "loss": 0.2328, + "step": 18368, + "teacher_loss": 0.18335847556591034 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.321159303188324, + "learning_rate": 1.2562450489664531e-05, + "loss": 0.2385, + "step": 18369, + "teacher_loss": 0.2292879968881607 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.34564781188964844, + "learning_rate": 1.2560209457318055e-05, + "loss": 0.2855, + "step": 18370, + "teacher_loss": 0.2788253426551819 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3948368728160858, + "learning_rate": 1.2557968480908658e-05, + "loss": 0.2728, + "step": 18371, + "teacher_loss": 0.2592817544937134 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.6856731176376343, + "learning_rate": 1.2555727560487732e-05, + "loss": 0.2681, + "step": 18372, + "teacher_loss": 0.22172386944293976 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.47335517406463623, + "learning_rate": 1.2553486696106639e-05, + "loss": 0.3098, + "step": 18373, + "teacher_loss": 0.2916069030761719 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.549426257610321, + "learning_rate": 1.255124588781676e-05, + "loss": 0.231, + "step": 18374, + "teacher_loss": 0.19566486775875092 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2438855767250061, + "learning_rate": 1.2549005135669474e-05, + "loss": 0.2144, + "step": 18375, + "teacher_loss": 0.21109366416931152 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.310373991727829, + "learning_rate": 1.254676443971615e-05, + "loss": 0.2006, + "step": 18376, + "teacher_loss": 0.18842291831970215 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2674780786037445, + "learning_rate": 1.2544523800008161e-05, + "loss": 0.2, + "step": 18377, + "teacher_loss": 0.19246046245098114 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.48570287227630615, + "learning_rate": 1.254228321659688e-05, + "loss": 0.3622, + "step": 18378, + "teacher_loss": 0.3484587073326111 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.472123384475708, + "learning_rate": 1.254004268953368e-05, + "loss": 0.2141, + "step": 18379, + "teacher_loss": 0.18545100092887878 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3307039141654968, + "learning_rate": 1.253780221886992e-05, + "loss": 0.2472, + "step": 18380, + "teacher_loss": 0.2378869652748108 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2865726351737976, + "learning_rate": 1.2535561804656977e-05, + "loss": 0.1866, + "step": 18381, + "teacher_loss": 0.175489604473114 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3651304841041565, + "learning_rate": 1.2533321446946216e-05, + "loss": 0.1919, + "step": 18382, + "teacher_loss": 0.1726188063621521 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.6302614212036133, + "learning_rate": 1.2531081145788989e-05, + "loss": 0.3075, + "step": 18383, + "teacher_loss": 0.27162978053092957 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3901684880256653, + "learning_rate": 1.2528840901236678e-05, + "loss": 0.2248, + "step": 18384, + "teacher_loss": 0.20642103254795074 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.5575323104858398, + "learning_rate": 1.2526600713340636e-05, + "loss": 0.3089, + "step": 18385, + "teacher_loss": 0.2813228964805603 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.2238282561302185, + "learning_rate": 1.2524360582152221e-05, + "loss": 0.1644, + "step": 18386, + "teacher_loss": 0.157810240983963 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3236406743526459, + "learning_rate": 1.2522120507722793e-05, + "loss": 0.2446, + "step": 18387, + "teacher_loss": 0.2358473539352417 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.17705470323562622, + "learning_rate": 1.2519880490103718e-05, + "loss": 0.1924, + "step": 18388, + "teacher_loss": 0.1941005289554596 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.4171791970729828, + "learning_rate": 1.2517640529346345e-05, + "loss": 0.3579, + "step": 18389, + "teacher_loss": 0.3512924909591675 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3510870933532715, + "learning_rate": 1.2515400625502031e-05, + "loss": 0.237, + "step": 18390, + "teacher_loss": 0.224280446767807 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3695766031742096, + "learning_rate": 1.2513160778622133e-05, + "loss": 0.1601, + "step": 18391, + "teacher_loss": 0.13681301474571228 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.5040054321289062, + "learning_rate": 1.2510920988758006e-05, + "loss": 0.1985, + "step": 18392, + "teacher_loss": 0.16455751657485962 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.3480949401855469, + "learning_rate": 1.2508681255960987e-05, + "loss": 0.2216, + "step": 18393, + "teacher_loss": 0.20751655101776123 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.20031729340553284, + "learning_rate": 1.2506441580282448e-05, + "loss": 0.1818, + "step": 18394, + "teacher_loss": 0.17977438867092133 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.4729866683483124, + "learning_rate": 1.2504201961773727e-05, + "loss": 0.2311, + "step": 18395, + "teacher_loss": 0.20421051979064941 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.33365535736083984, + "learning_rate": 1.2501962400486164e-05, + "loss": 0.2196, + "step": 18396, + "teacher_loss": 0.20690679550170898 + }, + { + "compression_loss": 0.0, + "epoch": 3.32, + "label_loss": 0.9292529821395874, + "learning_rate": 1.2499722896471121e-05, + "loss": 0.2346, + "step": 18397, + "teacher_loss": 0.15746843814849854 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6349519491195679, + "learning_rate": 1.2497483449779935e-05, + "loss": 0.2953, + "step": 18398, + "teacher_loss": 0.25754842162132263 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.15506920218467712, + "learning_rate": 1.2495244060463947e-05, + "loss": 0.1953, + "step": 18399, + "teacher_loss": 0.1997290849685669 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.7448081374168396, + "learning_rate": 1.2493004728574505e-05, + "loss": 0.4059, + "step": 18400, + "teacher_loss": 0.36821648478507996 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6886869668960571, + "learning_rate": 1.2490765454162949e-05, + "loss": 0.6381, + "step": 18401, + "teacher_loss": 0.6324926614761353 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.4537810683250427, + "learning_rate": 1.2488526237280615e-05, + "loss": 0.6914, + "step": 18402, + "teacher_loss": 0.7177980542182922 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.8059777021408081, + "learning_rate": 1.248628707797885e-05, + "loss": 0.3301, + "step": 18403, + "teacher_loss": 0.2772555351257324 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.4921862483024597, + "learning_rate": 1.2484047976308984e-05, + "loss": 0.2496, + "step": 18404, + "teacher_loss": 0.22259561717510223 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.31399106979370117, + "learning_rate": 1.248180893232235e-05, + "loss": 0.1766, + "step": 18405, + "teacher_loss": 0.16130468249320984 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.4702402353286743, + "learning_rate": 1.2479569946070293e-05, + "loss": 0.243, + "step": 18406, + "teacher_loss": 0.21779607236385345 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5913668274879456, + "learning_rate": 1.2477331017604143e-05, + "loss": 0.3171, + "step": 18407, + "teacher_loss": 0.2866814136505127 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.370329350233078, + "learning_rate": 1.2475092146975224e-05, + "loss": 0.1781, + "step": 18408, + "teacher_loss": 0.15668591856956482 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.331412672996521, + "learning_rate": 1.247285333423487e-05, + "loss": 0.2057, + "step": 18409, + "teacher_loss": 0.1917145550251007 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6745486259460449, + "learning_rate": 1.2470614579434416e-05, + "loss": 0.1956, + "step": 18410, + "teacher_loss": 0.1423964500427246 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.8741778135299683, + "learning_rate": 1.2468375882625186e-05, + "loss": 0.3204, + "step": 18411, + "teacher_loss": 0.2589215040206909 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.4578501880168915, + "learning_rate": 1.2466137243858504e-05, + "loss": 0.2694, + "step": 18412, + "teacher_loss": 0.24844232201576233 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.3624303936958313, + "learning_rate": 1.24638986631857e-05, + "loss": 0.2023, + "step": 18413, + "teacher_loss": 0.18451830744743347 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.45224764943122864, + "learning_rate": 1.2461660140658098e-05, + "loss": 0.1912, + "step": 18414, + "teacher_loss": 0.16218912601470947 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.39102041721343994, + "learning_rate": 1.2459421676327013e-05, + "loss": 0.2248, + "step": 18415, + "teacher_loss": 0.20634929835796356 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.3308330476284027, + "learning_rate": 1.2457183270243778e-05, + "loss": 0.1937, + "step": 18416, + "teacher_loss": 0.17841240763664246 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6286229491233826, + "learning_rate": 1.2454944922459709e-05, + "loss": 0.2835, + "step": 18417, + "teacher_loss": 0.2451486438512802 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.154030442237854, + "learning_rate": 1.245270663302611e-05, + "loss": 0.1292, + "step": 18418, + "teacher_loss": 0.12639518082141876 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5325549840927124, + "learning_rate": 1.2450468401994325e-05, + "loss": 0.2257, + "step": 18419, + "teacher_loss": 0.19166049361228943 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.26981422305107117, + "learning_rate": 1.2448230229415651e-05, + "loss": 0.2088, + "step": 18420, + "teacher_loss": 0.20201066136360168 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.3914691209793091, + "learning_rate": 1.2445992115341399e-05, + "loss": 0.199, + "step": 18421, + "teacher_loss": 0.1775984764099121 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5279285311698914, + "learning_rate": 1.2443754059822901e-05, + "loss": 0.3394, + "step": 18422, + "teacher_loss": 0.31845352053642273 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5221810340881348, + "learning_rate": 1.2441516062911454e-05, + "loss": 0.221, + "step": 18423, + "teacher_loss": 0.1874893307685852 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6687108278274536, + "learning_rate": 1.2439278124658374e-05, + "loss": 0.2394, + "step": 18424, + "teacher_loss": 0.19169974327087402 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.8883898854255676, + "learning_rate": 1.2437040245114966e-05, + "loss": 0.2697, + "step": 18425, + "teacher_loss": 0.2009199857711792 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.33791857957839966, + "learning_rate": 1.2434802424332546e-05, + "loss": 0.1567, + "step": 18426, + "teacher_loss": 0.13655614852905273 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5126756429672241, + "learning_rate": 1.2432564662362414e-05, + "loss": 0.2187, + "step": 18427, + "teacher_loss": 0.18605603277683258 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.17766505479812622, + "learning_rate": 1.2430326959255873e-05, + "loss": 0.1808, + "step": 18428, + "teacher_loss": 0.1811705231666565 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.42550814151763916, + "learning_rate": 1.2428089315064236e-05, + "loss": 0.263, + "step": 18429, + "teacher_loss": 0.2448895275592804 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.2661648988723755, + "learning_rate": 1.24258517298388e-05, + "loss": 0.1509, + "step": 18430, + "teacher_loss": 0.13806448876857758 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6711167097091675, + "learning_rate": 1.2423614203630858e-05, + "loss": 0.2358, + "step": 18431, + "teacher_loss": 0.1874634474515915 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.26167038083076477, + "learning_rate": 1.2421376736491728e-05, + "loss": 0.2641, + "step": 18432, + "teacher_loss": 0.2644244134426117 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.1798754781484604, + "learning_rate": 1.2419139328472693e-05, + "loss": 0.1694, + "step": 18433, + "teacher_loss": 0.16824504733085632 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.652587890625, + "learning_rate": 1.2416901979625055e-05, + "loss": 0.2305, + "step": 18434, + "teacher_loss": 0.1835586428642273 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.23921085894107819, + "learning_rate": 1.2414664690000113e-05, + "loss": 0.1952, + "step": 18435, + "teacher_loss": 0.1903022974729538 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5190736055374146, + "learning_rate": 1.2412427459649156e-05, + "loss": 0.1926, + "step": 18436, + "teacher_loss": 0.15629518032073975 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.3245939016342163, + "learning_rate": 1.241019028862348e-05, + "loss": 0.2996, + "step": 18437, + "teacher_loss": 0.2968396842479706 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.368893563747406, + "learning_rate": 1.2407953176974377e-05, + "loss": 0.232, + "step": 18438, + "teacher_loss": 0.21683043241500854 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.14916355907917023, + "learning_rate": 1.2405716124753136e-05, + "loss": 0.1683, + "step": 18439, + "teacher_loss": 0.1704126000404358 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.5537216663360596, + "learning_rate": 1.2403479132011044e-05, + "loss": 0.2416, + "step": 18440, + "teacher_loss": 0.2069188505411148 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.22304297983646393, + "learning_rate": 1.240124219879939e-05, + "loss": 0.1668, + "step": 18441, + "teacher_loss": 0.16059334576129913 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.30141621828079224, + "learning_rate": 1.2399005325169468e-05, + "loss": 0.3025, + "step": 18442, + "teacher_loss": 0.3026027977466583 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.4325682520866394, + "learning_rate": 1.2396768511172546e-05, + "loss": 0.1961, + "step": 18443, + "teacher_loss": 0.16981473565101624 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.45541608333587646, + "learning_rate": 1.2394531756859927e-05, + "loss": 0.3309, + "step": 18444, + "teacher_loss": 0.317097008228302 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.20820805430412292, + "learning_rate": 1.2392295062282878e-05, + "loss": 0.2086, + "step": 18445, + "teacher_loss": 0.2086043357849121 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.2874957025051117, + "learning_rate": 1.239005842749268e-05, + "loss": 0.1846, + "step": 18446, + "teacher_loss": 0.17313873767852783 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.7704253196716309, + "learning_rate": 1.2387821852540628e-05, + "loss": 0.3349, + "step": 18447, + "teacher_loss": 0.28652411699295044 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.2260725498199463, + "learning_rate": 1.2385585337477983e-05, + "loss": 0.1645, + "step": 18448, + "teacher_loss": 0.15767429769039154 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.33926627039909363, + "learning_rate": 1.238334888235603e-05, + "loss": 0.1951, + "step": 18449, + "teacher_loss": 0.1791081577539444 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.4083828330039978, + "learning_rate": 1.2381112487226038e-05, + "loss": 0.2822, + "step": 18450, + "teacher_loss": 0.2681264877319336 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.6318469047546387, + "learning_rate": 1.2378876152139288e-05, + "loss": 0.2726, + "step": 18451, + "teacher_loss": 0.23272892832756042 + }, + { + "compression_loss": 0.0, + "epoch": 3.33, + "label_loss": 0.7110015749931335, + "learning_rate": 1.237663987714705e-05, + "loss": 0.2185, + "step": 18452, + "teacher_loss": 0.16375377774238586 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.39453110098838806, + "learning_rate": 1.2374403662300589e-05, + "loss": 0.2002, + "step": 18453, + "teacher_loss": 0.1786387413740158 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.8089221715927124, + "learning_rate": 1.2372167507651187e-05, + "loss": 0.3536, + "step": 18454, + "teacher_loss": 0.30305707454681396 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.39054885506629944, + "learning_rate": 1.2369931413250104e-05, + "loss": 0.1923, + "step": 18455, + "teacher_loss": 0.1702529788017273 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.10799497365951538, + "learning_rate": 1.2367695379148601e-05, + "loss": 0.1792, + "step": 18456, + "teacher_loss": 0.18709462881088257 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.17388957738876343, + "learning_rate": 1.2365459405397963e-05, + "loss": 0.2188, + "step": 18457, + "teacher_loss": 0.22379735112190247 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5735845565795898, + "learning_rate": 1.2363223492049435e-05, + "loss": 0.2928, + "step": 18458, + "teacher_loss": 0.26162970066070557 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5318232774734497, + "learning_rate": 1.2360987639154286e-05, + "loss": 0.2859, + "step": 18459, + "teacher_loss": 0.2586110234260559 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.2984362840652466, + "learning_rate": 1.235875184676378e-05, + "loss": 0.2299, + "step": 18460, + "teacher_loss": 0.22227036952972412 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.26036423444747925, + "learning_rate": 1.2356516114929176e-05, + "loss": 0.262, + "step": 18461, + "teacher_loss": 0.26213395595550537 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.4930025339126587, + "learning_rate": 1.235428044370173e-05, + "loss": 0.3238, + "step": 18462, + "teacher_loss": 0.3049967288970947 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3786955177783966, + "learning_rate": 1.2352044833132701e-05, + "loss": 0.2032, + "step": 18463, + "teacher_loss": 0.1837441325187683 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.26752012968063354, + "learning_rate": 1.2349809283273348e-05, + "loss": 0.1905, + "step": 18464, + "teacher_loss": 0.18198224902153015 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.9993331432342529, + "learning_rate": 1.2347573794174918e-05, + "loss": 0.2399, + "step": 18465, + "teacher_loss": 0.15550720691680908 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.1885949969291687, + "learning_rate": 1.2345338365888673e-05, + "loss": 0.1855, + "step": 18466, + "teacher_loss": 0.18512190878391266 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3058115243911743, + "learning_rate": 1.2343102998465863e-05, + "loss": 0.1693, + "step": 18467, + "teacher_loss": 0.15412163734436035 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.4665020704269409, + "learning_rate": 1.234086769195773e-05, + "loss": 0.3233, + "step": 18468, + "teacher_loss": 0.3074265420436859 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.4196681082248688, + "learning_rate": 1.2338632446415526e-05, + "loss": 0.2735, + "step": 18469, + "teacher_loss": 0.2572307586669922 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3531424403190613, + "learning_rate": 1.2336397261890506e-05, + "loss": 0.1973, + "step": 18470, + "teacher_loss": 0.17996114492416382 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3543007969856262, + "learning_rate": 1.233416213843391e-05, + "loss": 0.2157, + "step": 18471, + "teacher_loss": 0.20034965872764587 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5449449419975281, + "learning_rate": 1.233192707609698e-05, + "loss": 0.2124, + "step": 18472, + "teacher_loss": 0.17540481686592102 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.906792163848877, + "learning_rate": 1.2329692074930967e-05, + "loss": 0.3783, + "step": 18473, + "teacher_loss": 0.31960436701774597 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.38990023732185364, + "learning_rate": 1.2327457134987107e-05, + "loss": 0.2613, + "step": 18474, + "teacher_loss": 0.24697056412696838 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5535478591918945, + "learning_rate": 1.2325222256316639e-05, + "loss": 0.2907, + "step": 18475, + "teacher_loss": 0.26148879528045654 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5571458339691162, + "learning_rate": 1.232298743897081e-05, + "loss": 0.205, + "step": 18476, + "teacher_loss": 0.16584277153015137 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.19842708110809326, + "learning_rate": 1.2320752683000852e-05, + "loss": 0.1387, + "step": 18477, + "teacher_loss": 0.13203155994415283 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.417305052280426, + "learning_rate": 1.2318517988457999e-05, + "loss": 0.2651, + "step": 18478, + "teacher_loss": 0.24821697175502777 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.37035760283470154, + "learning_rate": 1.2316283355393493e-05, + "loss": 0.2122, + "step": 18479, + "teacher_loss": 0.19458505511283875 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.40091216564178467, + "learning_rate": 1.2314048783858566e-05, + "loss": 0.2877, + "step": 18480, + "teacher_loss": 0.27507850527763367 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5844032764434814, + "learning_rate": 1.2311814273904437e-05, + "loss": 0.2451, + "step": 18481, + "teacher_loss": 0.2074226438999176 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5006624460220337, + "learning_rate": 1.2309579825582357e-05, + "loss": 0.2057, + "step": 18482, + "teacher_loss": 0.17291507124900818 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.33015263080596924, + "learning_rate": 1.2307345438943544e-05, + "loss": 0.2545, + "step": 18483, + "teacher_loss": 0.24613966047763824 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5301461219787598, + "learning_rate": 1.2305111114039224e-05, + "loss": 0.2733, + "step": 18484, + "teacher_loss": 0.24474181234836578 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.38936296105384827, + "learning_rate": 1.2302876850920627e-05, + "loss": 0.221, + "step": 18485, + "teacher_loss": 0.20230278372764587 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 1.1155613660812378, + "learning_rate": 1.2300642649638978e-05, + "loss": 0.4622, + "step": 18486, + "teacher_loss": 0.38959842920303345 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5656943321228027, + "learning_rate": 1.22984085102455e-05, + "loss": 0.3216, + "step": 18487, + "teacher_loss": 0.2945324778556824 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5215837955474854, + "learning_rate": 1.2296174432791415e-05, + "loss": 0.2767, + "step": 18488, + "teacher_loss": 0.24953246116638184 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 1.0448509454727173, + "learning_rate": 1.2293940417327945e-05, + "loss": 0.4161, + "step": 18489, + "teacher_loss": 0.3462563455104828 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.9179197549819946, + "learning_rate": 1.229170646390631e-05, + "loss": 0.4136, + "step": 18490, + "teacher_loss": 0.3575159013271332 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.2976725995540619, + "learning_rate": 1.2289472572577716e-05, + "loss": 0.2105, + "step": 18491, + "teacher_loss": 0.20082589983940125 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3082728981971741, + "learning_rate": 1.2287238743393401e-05, + "loss": 0.2324, + "step": 18492, + "teacher_loss": 0.22392529249191284 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.4041733741760254, + "learning_rate": 1.2285004976404564e-05, + "loss": 0.2867, + "step": 18493, + "teacher_loss": 0.273611843585968 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3582770824432373, + "learning_rate": 1.2282771271662417e-05, + "loss": 0.26, + "step": 18494, + "teacher_loss": 0.2491290271282196 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.4711555242538452, + "learning_rate": 1.2280537629218186e-05, + "loss": 0.2387, + "step": 18495, + "teacher_loss": 0.21284595131874084 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.33075985312461853, + "learning_rate": 1.2278304049123073e-05, + "loss": 0.1746, + "step": 18496, + "teacher_loss": 0.15727092325687408 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.22954338788986206, + "learning_rate": 1.2276070531428284e-05, + "loss": 0.3185, + "step": 18497, + "teacher_loss": 0.32840949296951294 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.3590051531791687, + "learning_rate": 1.2273837076185034e-05, + "loss": 0.1554, + "step": 18498, + "teacher_loss": 0.13277405500411987 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.698039174079895, + "learning_rate": 1.2271603683444525e-05, + "loss": 0.2686, + "step": 18499, + "teacher_loss": 0.22086824476718903 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.24242046475410461, + "learning_rate": 1.2269370353257963e-05, + "loss": 0.1952, + "step": 18500, + "teacher_loss": 0.189897820353508 + }, + { + "epoch": 3.34, + "eval_exact_match": 79.81078524124882, + "eval_f1": 87.28057577192935, + "step": 18500 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.5803496837615967, + "learning_rate": 1.2267137085676552e-05, + "loss": 0.2751, + "step": 18501, + "teacher_loss": 0.2411813735961914 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.498191773891449, + "learning_rate": 1.2264903880751496e-05, + "loss": 0.2751, + "step": 18502, + "teacher_loss": 0.250287801027298 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.20128510892391205, + "learning_rate": 1.226267073853399e-05, + "loss": 0.1947, + "step": 18503, + "teacher_loss": 0.19400885701179504 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.40472954511642456, + "learning_rate": 1.2260437659075242e-05, + "loss": 0.2185, + "step": 18504, + "teacher_loss": 0.19781237840652466 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.12580493092536926, + "learning_rate": 1.2258204642426445e-05, + "loss": 0.2005, + "step": 18505, + "teacher_loss": 0.20878593623638153 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.7386248707771301, + "learning_rate": 1.2255971688638787e-05, + "loss": 0.3389, + "step": 18506, + "teacher_loss": 0.2945179343223572 + }, + { + "compression_loss": 0.0, + "epoch": 3.34, + "label_loss": 0.2921562194824219, + "learning_rate": 1.2253738797763482e-05, + "loss": 0.2104, + "step": 18507, + "teacher_loss": 0.2013486623764038 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.1099204272031784, + "learning_rate": 1.2251505969851708e-05, + "loss": 0.1199, + "step": 18508, + "teacher_loss": 0.1210232824087143 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.3114200234413147, + "learning_rate": 1.2249273204954659e-05, + "loss": 0.28, + "step": 18509, + "teacher_loss": 0.2764671742916107 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.17083880305290222, + "learning_rate": 1.2247040503123533e-05, + "loss": 0.2225, + "step": 18510, + "teacher_loss": 0.22820451855659485 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.6430106163024902, + "learning_rate": 1.224480786440951e-05, + "loss": 0.2524, + "step": 18511, + "teacher_loss": 0.20896175503730774 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.2527446746826172, + "learning_rate": 1.2242575288863785e-05, + "loss": 0.2094, + "step": 18512, + "teacher_loss": 0.2046075463294983 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.3001673221588135, + "learning_rate": 1.2240342776537538e-05, + "loss": 0.2029, + "step": 18513, + "teacher_loss": 0.1921398639678955 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.27059808373451233, + "learning_rate": 1.2238110327481961e-05, + "loss": 0.1613, + "step": 18514, + "teacher_loss": 0.14914755523204803 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.39105039834976196, + "learning_rate": 1.2235877941748237e-05, + "loss": 0.1626, + "step": 18515, + "teacher_loss": 0.1371745765209198 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7569479942321777, + "learning_rate": 1.2233645619387536e-05, + "loss": 0.5371, + "step": 18516, + "teacher_loss": 0.5126884579658508 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.2586199641227722, + "learning_rate": 1.2231413360451054e-05, + "loss": 0.1986, + "step": 18517, + "teacher_loss": 0.1919756382703781 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.4103398323059082, + "learning_rate": 1.2229181164989963e-05, + "loss": 0.1841, + "step": 18518, + "teacher_loss": 0.15899735689163208 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.9105491638183594, + "learning_rate": 1.2226949033055429e-05, + "loss": 0.3037, + "step": 18519, + "teacher_loss": 0.23623651266098022 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.2622298002243042, + "learning_rate": 1.2224716964698653e-05, + "loss": 0.1845, + "step": 18520, + "teacher_loss": 0.17591683566570282 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.334195077419281, + "learning_rate": 1.2222484959970793e-05, + "loss": 0.2239, + "step": 18521, + "teacher_loss": 0.21163348853588104 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.48472732305526733, + "learning_rate": 1.222025301892302e-05, + "loss": 0.1968, + "step": 18522, + "teacher_loss": 0.1648542284965515 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.47648879885673523, + "learning_rate": 1.2218021141606517e-05, + "loss": 0.2455, + "step": 18523, + "teacher_loss": 0.2198607623577118 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7430230379104614, + "learning_rate": 1.2215789328072448e-05, + "loss": 0.2808, + "step": 18524, + "teacher_loss": 0.22943472862243652 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5140787363052368, + "learning_rate": 1.2213557578371977e-05, + "loss": 0.2533, + "step": 18525, + "teacher_loss": 0.22428598999977112 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.27317333221435547, + "learning_rate": 1.2211325892556282e-05, + "loss": 0.2559, + "step": 18526, + "teacher_loss": 0.2539953887462616 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5334306955337524, + "learning_rate": 1.2209094270676522e-05, + "loss": 0.2712, + "step": 18527, + "teacher_loss": 0.24201242625713348 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.3821364641189575, + "learning_rate": 1.2206862712783859e-05, + "loss": 0.2809, + "step": 18528, + "teacher_loss": 0.2696574330329895 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.4276137351989746, + "learning_rate": 1.2204631218929463e-05, + "loss": 0.2193, + "step": 18529, + "teacher_loss": 0.19615933299064636 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.3350640535354614, + "learning_rate": 1.2202399789164499e-05, + "loss": 0.1959, + "step": 18530, + "teacher_loss": 0.18041522800922394 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.548102617263794, + "learning_rate": 1.2200168423540114e-05, + "loss": 0.1884, + "step": 18531, + "teacher_loss": 0.1483854353427887 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.23635722696781158, + "learning_rate": 1.219793712210747e-05, + "loss": 0.2002, + "step": 18532, + "teacher_loss": 0.19615203142166138 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.24337252974510193, + "learning_rate": 1.219570588491773e-05, + "loss": 0.1771, + "step": 18533, + "teacher_loss": 0.16971978545188904 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.38437357544898987, + "learning_rate": 1.2193474712022047e-05, + "loss": 0.2749, + "step": 18534, + "teacher_loss": 0.2627674639225006 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.28723418712615967, + "learning_rate": 1.219124360347157e-05, + "loss": 0.2381, + "step": 18535, + "teacher_loss": 0.2326916754245758 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7126603126525879, + "learning_rate": 1.2189012559317459e-05, + "loss": 0.3246, + "step": 18536, + "teacher_loss": 0.28148210048675537 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5261780023574829, + "learning_rate": 1.2186781579610864e-05, + "loss": 0.3424, + "step": 18537, + "teacher_loss": 0.32196518778800964 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5825536251068115, + "learning_rate": 1.2184550664402927e-05, + "loss": 0.2129, + "step": 18538, + "teacher_loss": 0.1717773675918579 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.4640715718269348, + "learning_rate": 1.2182319813744807e-05, + "loss": 0.2388, + "step": 18539, + "teacher_loss": 0.21379020810127258 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.17457765340805054, + "learning_rate": 1.2180089027687649e-05, + "loss": 0.189, + "step": 18540, + "teacher_loss": 0.1905689239501953 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.2991122603416443, + "learning_rate": 1.2177858306282585e-05, + "loss": 0.1187, + "step": 18541, + "teacher_loss": 0.09870870411396027 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.3548603057861328, + "learning_rate": 1.217562764958078e-05, + "loss": 0.2726, + "step": 18542, + "teacher_loss": 0.26343834400177 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7231144905090332, + "learning_rate": 1.2173397057633361e-05, + "loss": 0.244, + "step": 18543, + "teacher_loss": 0.1907651126384735 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.20886202156543732, + "learning_rate": 1.2171166530491466e-05, + "loss": 0.1305, + "step": 18544, + "teacher_loss": 0.12178203463554382 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5559341907501221, + "learning_rate": 1.2168936068206252e-05, + "loss": 0.2444, + "step": 18545, + "teacher_loss": 0.20973248779773712 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5921544432640076, + "learning_rate": 1.216670567082884e-05, + "loss": 0.2553, + "step": 18546, + "teacher_loss": 0.2178599238395691 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.6064504384994507, + "learning_rate": 1.2164475338410372e-05, + "loss": 0.3017, + "step": 18547, + "teacher_loss": 0.2678424119949341 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7721132040023804, + "learning_rate": 1.2162245071001985e-05, + "loss": 0.292, + "step": 18548, + "teacher_loss": 0.23870941996574402 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.29890891909599304, + "learning_rate": 1.216001486865481e-05, + "loss": 0.2652, + "step": 18549, + "teacher_loss": 0.26143068075180054 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.2940792441368103, + "learning_rate": 1.2157784731419979e-05, + "loss": 0.173, + "step": 18550, + "teacher_loss": 0.1595214456319809 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.30452167987823486, + "learning_rate": 1.2155554659348622e-05, + "loss": 0.19, + "step": 18551, + "teacher_loss": 0.1773168444633484 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.731475293636322, + "learning_rate": 1.2153324652491873e-05, + "loss": 0.2693, + "step": 18552, + "teacher_loss": 0.217957004904747 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.35454702377319336, + "learning_rate": 1.2151094710900854e-05, + "loss": 0.2856, + "step": 18553, + "teacher_loss": 0.27796247601509094 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.3648371994495392, + "learning_rate": 1.2148864834626684e-05, + "loss": 0.1973, + "step": 18554, + "teacher_loss": 0.17865684628486633 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7870327830314636, + "learning_rate": 1.2146635023720504e-05, + "loss": 0.2375, + "step": 18555, + "teacher_loss": 0.17639723420143127 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.8622509241104126, + "learning_rate": 1.2144405278233427e-05, + "loss": 0.5382, + "step": 18556, + "teacher_loss": 0.5021752119064331 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.47104042768478394, + "learning_rate": 1.2142175598216569e-05, + "loss": 0.2151, + "step": 18557, + "teacher_loss": 0.18663811683654785 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.40209949016571045, + "learning_rate": 1.2139945983721063e-05, + "loss": 0.2722, + "step": 18558, + "teacher_loss": 0.2577453553676605 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.27314847707748413, + "learning_rate": 1.2137716434798018e-05, + "loss": 0.1867, + "step": 18559, + "teacher_loss": 0.17710702121257782 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 1.0312864780426025, + "learning_rate": 1.2135486951498552e-05, + "loss": 0.3745, + "step": 18560, + "teacher_loss": 0.30156177282333374 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.942995548248291, + "learning_rate": 1.2133257533873783e-05, + "loss": 0.2822, + "step": 18561, + "teacher_loss": 0.20877647399902344 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.7195024490356445, + "learning_rate": 1.2131028181974827e-05, + "loss": 0.2512, + "step": 18562, + "teacher_loss": 0.19915470480918884 + }, + { + "compression_loss": 0.0, + "epoch": 3.35, + "label_loss": 0.5158272981643677, + "learning_rate": 1.2128798895852787e-05, + "loss": 0.3056, + "step": 18563, + "teacher_loss": 0.28220319747924805 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.17762607336044312, + "learning_rate": 1.2126569675558785e-05, + "loss": 0.1403, + "step": 18564, + "teacher_loss": 0.1361183375120163 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.22994890809059143, + "learning_rate": 1.2124340521143929e-05, + "loss": 0.183, + "step": 18565, + "teacher_loss": 0.1777379810810089 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.7605133652687073, + "learning_rate": 1.212211143265931e-05, + "loss": 0.3544, + "step": 18566, + "teacher_loss": 0.3092246949672699 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4144997000694275, + "learning_rate": 1.211988241015606e-05, + "loss": 0.2093, + "step": 18567, + "teacher_loss": 0.18645372986793518 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4535015821456909, + "learning_rate": 1.2117653453685269e-05, + "loss": 0.207, + "step": 18568, + "teacher_loss": 0.17958375811576843 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.5924638509750366, + "learning_rate": 1.2115424563298035e-05, + "loss": 0.2092, + "step": 18569, + "teacher_loss": 0.1665697991847992 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.3411828279495239, + "learning_rate": 1.2113195739045477e-05, + "loss": 0.2037, + "step": 18570, + "teacher_loss": 0.18839582800865173 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.05584712699055672, + "learning_rate": 1.2110966980978682e-05, + "loss": 0.2105, + "step": 18571, + "teacher_loss": 0.22770346701145172 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.8084791898727417, + "learning_rate": 1.210873828914875e-05, + "loss": 0.2323, + "step": 18572, + "teacher_loss": 0.16825725138187408 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.6047141551971436, + "learning_rate": 1.2106509663606783e-05, + "loss": 0.1975, + "step": 18573, + "teacher_loss": 0.1521998941898346 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.5429080724716187, + "learning_rate": 1.2104281104403873e-05, + "loss": 0.2789, + "step": 18574, + "teacher_loss": 0.24952450394630432 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.41864681243896484, + "learning_rate": 1.2102052611591118e-05, + "loss": 0.1989, + "step": 18575, + "teacher_loss": 0.17443254590034485 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.3734470009803772, + "learning_rate": 1.2099824185219603e-05, + "loss": 0.4059, + "step": 18576, + "teacher_loss": 0.40952742099761963 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.350409597158432, + "learning_rate": 1.2097595825340429e-05, + "loss": 0.2692, + "step": 18577, + "teacher_loss": 0.2601744532585144 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.26680731773376465, + "learning_rate": 1.2095367532004685e-05, + "loss": 0.1996, + "step": 18578, + "teacher_loss": 0.19217166304588318 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4685589671134949, + "learning_rate": 1.2093139305263442e-05, + "loss": 0.2764, + "step": 18579, + "teacher_loss": 0.2550569176673889 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.7648811340332031, + "learning_rate": 1.2090911145167811e-05, + "loss": 0.2192, + "step": 18580, + "teacher_loss": 0.15860819816589355 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.7024475932121277, + "learning_rate": 1.2088683051768863e-05, + "loss": 0.3673, + "step": 18581, + "teacher_loss": 0.3300231695175171 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.2347404658794403, + "learning_rate": 1.208645502511768e-05, + "loss": 0.198, + "step": 18582, + "teacher_loss": 0.19395726919174194 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.48253703117370605, + "learning_rate": 1.2084227065265351e-05, + "loss": 0.2555, + "step": 18583, + "teacher_loss": 0.23032832145690918 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.6801644563674927, + "learning_rate": 1.2081999172262955e-05, + "loss": 0.3115, + "step": 18584, + "teacher_loss": 0.2705579400062561 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.7782101631164551, + "learning_rate": 1.2079771346161564e-05, + "loss": 0.2723, + "step": 18585, + "teacher_loss": 0.2161393165588379 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.30283600091934204, + "learning_rate": 1.2077543587012266e-05, + "loss": 0.1836, + "step": 18586, + "teacher_loss": 0.17035819590091705 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.22044560313224792, + "learning_rate": 1.207531589486613e-05, + "loss": 0.2006, + "step": 18587, + "teacher_loss": 0.1984504610300064 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.2638302147388458, + "learning_rate": 1.2073088269774227e-05, + "loss": 0.1986, + "step": 18588, + "teacher_loss": 0.19139699637889862 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.3105957806110382, + "learning_rate": 1.2070860711787641e-05, + "loss": 0.1705, + "step": 18589, + "teacher_loss": 0.154911607503891 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.5322176218032837, + "learning_rate": 1.2068633220957438e-05, + "loss": 0.2322, + "step": 18590, + "teacher_loss": 0.198826402425766 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.3685023784637451, + "learning_rate": 1.2066405797334679e-05, + "loss": 0.1734, + "step": 18591, + "teacher_loss": 0.1517159640789032 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.8182448148727417, + "learning_rate": 1.2064178440970448e-05, + "loss": 0.3095, + "step": 18592, + "teacher_loss": 0.25293880701065063 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.18817299604415894, + "learning_rate": 1.20619511519158e-05, + "loss": 0.2222, + "step": 18593, + "teacher_loss": 0.22597861289978027 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4017627537250519, + "learning_rate": 1.2059723930221805e-05, + "loss": 0.2513, + "step": 18594, + "teacher_loss": 0.23456136882305145 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.752432107925415, + "learning_rate": 1.2057496775939519e-05, + "loss": 0.319, + "step": 18595, + "teacher_loss": 0.2708294987678528 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.34299203753471375, + "learning_rate": 1.2055269689120014e-05, + "loss": 0.1983, + "step": 18596, + "teacher_loss": 0.18218111991882324 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.3373622000217438, + "learning_rate": 1.2053042669814347e-05, + "loss": 0.1795, + "step": 18597, + "teacher_loss": 0.16193875670433044 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.630958080291748, + "learning_rate": 1.2050815718073573e-05, + "loss": 0.3137, + "step": 18598, + "teacher_loss": 0.27845728397369385 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.3826289772987366, + "learning_rate": 1.2048588833948755e-05, + "loss": 0.2518, + "step": 18599, + "teacher_loss": 0.23722949624061584 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.5850616097450256, + "learning_rate": 1.2046362017490947e-05, + "loss": 0.6712, + "step": 18600, + "teacher_loss": 0.6807987093925476 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.47760874032974243, + "learning_rate": 1.2044135268751199e-05, + "loss": 0.2563, + "step": 18601, + "teacher_loss": 0.23170697689056396 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.1457206904888153, + "learning_rate": 1.2041908587780571e-05, + "loss": 0.2071, + "step": 18602, + "teacher_loss": 0.21389545500278473 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.27983710169792175, + "learning_rate": 1.2039681974630111e-05, + "loss": 0.2171, + "step": 18603, + "teacher_loss": 0.2100846767425537 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.5690048933029175, + "learning_rate": 1.203745542935086e-05, + "loss": 0.3344, + "step": 18604, + "teacher_loss": 0.3083241879940033 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.2317158281803131, + "learning_rate": 1.2035228951993885e-05, + "loss": 0.165, + "step": 18605, + "teacher_loss": 0.15754428505897522 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4852762222290039, + "learning_rate": 1.2033002542610216e-05, + "loss": 0.1732, + "step": 18606, + "teacher_loss": 0.1384781152009964 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.5166403651237488, + "learning_rate": 1.20307762012509e-05, + "loss": 0.2531, + "step": 18607, + "teacher_loss": 0.22379574179649353 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.8595681190490723, + "learning_rate": 1.2028549927966987e-05, + "loss": 0.2832, + "step": 18608, + "teacher_loss": 0.219117671251297 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4757612943649292, + "learning_rate": 1.2026323722809516e-05, + "loss": 0.3655, + "step": 18609, + "teacher_loss": 0.35328495502471924 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.8425443172454834, + "learning_rate": 1.2024097585829522e-05, + "loss": 0.3063, + "step": 18610, + "teacher_loss": 0.24674092233181 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.4537784457206726, + "learning_rate": 1.2021871517078052e-05, + "loss": 0.2742, + "step": 18611, + "teacher_loss": 0.25426623225212097 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.6092991828918457, + "learning_rate": 1.2019645516606139e-05, + "loss": 0.2731, + "step": 18612, + "teacher_loss": 0.23575352132320404 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.6885086297988892, + "learning_rate": 1.2017419584464815e-05, + "loss": 0.238, + "step": 18613, + "teacher_loss": 0.18789339065551758 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.38987648487091064, + "learning_rate": 1.2015193720705123e-05, + "loss": 0.2065, + "step": 18614, + "teacher_loss": 0.18611271679401398 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.39823824167251587, + "learning_rate": 1.201296792537809e-05, + "loss": 0.2441, + "step": 18615, + "teacher_loss": 0.2269316017627716 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.186028853058815, + "learning_rate": 1.2010742198534748e-05, + "loss": 0.1884, + "step": 18616, + "teacher_loss": 0.18868900835514069 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.2704801857471466, + "learning_rate": 1.2008516540226115e-05, + "loss": 0.1735, + "step": 18617, + "teacher_loss": 0.16272449493408203 + }, + { + "compression_loss": 0.0, + "epoch": 3.36, + "label_loss": 0.2526557445526123, + "learning_rate": 1.2006290950503241e-05, + "loss": 0.2194, + "step": 18618, + "teacher_loss": 0.2156938761472702 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.35308438539505005, + "learning_rate": 1.2004065429417136e-05, + "loss": 0.359, + "step": 18619, + "teacher_loss": 0.35966986417770386 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 1.0550552606582642, + "learning_rate": 1.2001839977018822e-05, + "loss": 0.2846, + "step": 18620, + "teacher_loss": 0.19896674156188965 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.2808707654476166, + "learning_rate": 1.1999614593359335e-05, + "loss": 0.2119, + "step": 18621, + "teacher_loss": 0.20427697896957397 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5573078989982605, + "learning_rate": 1.199738927848969e-05, + "loss": 0.2696, + "step": 18622, + "teacher_loss": 0.23759154975414276 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.2646842896938324, + "learning_rate": 1.1995164032460903e-05, + "loss": 0.1714, + "step": 18623, + "teacher_loss": 0.16099140048027039 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5229165554046631, + "learning_rate": 1.1992938855323996e-05, + "loss": 0.2148, + "step": 18624, + "teacher_loss": 0.180563822388649 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.8742836713790894, + "learning_rate": 1.1990713747129988e-05, + "loss": 0.3068, + "step": 18625, + "teacher_loss": 0.2437036782503128 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.8426339626312256, + "learning_rate": 1.1988488707929887e-05, + "loss": 0.4661, + "step": 18626, + "teacher_loss": 0.4242169260978699 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.20311963558197021, + "learning_rate": 1.1986263737774717e-05, + "loss": 0.1608, + "step": 18627, + "teacher_loss": 0.15607714653015137 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.32049351930618286, + "learning_rate": 1.1984038836715484e-05, + "loss": 0.2359, + "step": 18628, + "teacher_loss": 0.2264769971370697 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.41010230779647827, + "learning_rate": 1.1981814004803191e-05, + "loss": 0.2951, + "step": 18629, + "teacher_loss": 0.28229087591171265 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6663646697998047, + "learning_rate": 1.1979589242088862e-05, + "loss": 0.313, + "step": 18630, + "teacher_loss": 0.2736872732639313 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6314862370491028, + "learning_rate": 1.1977364548623492e-05, + "loss": 0.2767, + "step": 18631, + "teacher_loss": 0.237329363822937 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.4850807785987854, + "learning_rate": 1.197513992445809e-05, + "loss": 0.2486, + "step": 18632, + "teacher_loss": 0.2223183512687683 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5129311084747314, + "learning_rate": 1.1972915369643662e-05, + "loss": 0.218, + "step": 18633, + "teacher_loss": 0.1852179914712906 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.4080178439617157, + "learning_rate": 1.1970690884231209e-05, + "loss": 0.2027, + "step": 18634, + "teacher_loss": 0.17984004318714142 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6929551959037781, + "learning_rate": 1.196846646827173e-05, + "loss": 0.2944, + "step": 18635, + "teacher_loss": 0.25014016032218933 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5995739698410034, + "learning_rate": 1.1966242121816223e-05, + "loss": 0.3651, + "step": 18636, + "teacher_loss": 0.3390834331512451 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.787972092628479, + "learning_rate": 1.1964017844915694e-05, + "loss": 0.2312, + "step": 18637, + "teacher_loss": 0.16933681070804596 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5746564269065857, + "learning_rate": 1.1961793637621136e-05, + "loss": 0.2216, + "step": 18638, + "teacher_loss": 0.18241187930107117 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.4892458915710449, + "learning_rate": 1.1959569499983532e-05, + "loss": 0.1803, + "step": 18639, + "teacher_loss": 0.14594025909900665 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.7154369950294495, + "learning_rate": 1.1957345432053891e-05, + "loss": 0.237, + "step": 18640, + "teacher_loss": 0.18384280800819397 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.2722594141960144, + "learning_rate": 1.1955121433883197e-05, + "loss": 0.2094, + "step": 18641, + "teacher_loss": 0.2023903876543045 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.2032523900270462, + "learning_rate": 1.1952897505522431e-05, + "loss": 0.1666, + "step": 18642, + "teacher_loss": 0.16253966093063354 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.40038394927978516, + "learning_rate": 1.1950673647022597e-05, + "loss": 0.348, + "step": 18643, + "teacher_loss": 0.34214136004447937 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 1.0477972030639648, + "learning_rate": 1.1948449858434673e-05, + "loss": 0.2337, + "step": 18644, + "teacher_loss": 0.143270805478096 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.3286711871623993, + "learning_rate": 1.1946226139809643e-05, + "loss": 0.2434, + "step": 18645, + "teacher_loss": 0.23391762375831604 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 1.3002586364746094, + "learning_rate": 1.1944002491198492e-05, + "loss": 0.4167, + "step": 18646, + "teacher_loss": 0.3184717297554016 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.35456693172454834, + "learning_rate": 1.1941778912652205e-05, + "loss": 0.2283, + "step": 18647, + "teacher_loss": 0.2142752707004547 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6176414489746094, + "learning_rate": 1.1939555404221753e-05, + "loss": 0.2454, + "step": 18648, + "teacher_loss": 0.2040928602218628 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.8169912099838257, + "learning_rate": 1.1937331965958125e-05, + "loss": 0.2681, + "step": 18649, + "teacher_loss": 0.20716747641563416 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.4531940817832947, + "learning_rate": 1.193510859791229e-05, + "loss": 0.226, + "step": 18650, + "teacher_loss": 0.20073404908180237 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.44506001472473145, + "learning_rate": 1.1932885300135223e-05, + "loss": 0.4049, + "step": 18651, + "teacher_loss": 0.40047693252563477 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.507017195224762, + "learning_rate": 1.1930662072677904e-05, + "loss": 0.2208, + "step": 18652, + "teacher_loss": 0.18898217380046844 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.24870118498802185, + "learning_rate": 1.1928438915591304e-05, + "loss": 0.2632, + "step": 18653, + "teacher_loss": 0.26480260491371155 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.24229568243026733, + "learning_rate": 1.1926215828926384e-05, + "loss": 0.207, + "step": 18654, + "teacher_loss": 0.20306628942489624 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.582056999206543, + "learning_rate": 1.1923992812734126e-05, + "loss": 0.2198, + "step": 18655, + "teacher_loss": 0.17957313358783722 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5569716691970825, + "learning_rate": 1.1921769867065487e-05, + "loss": 0.2719, + "step": 18656, + "teacher_loss": 0.2402779459953308 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5424741506576538, + "learning_rate": 1.1919546991971437e-05, + "loss": 0.3, + "step": 18657, + "teacher_loss": 0.2730475664138794 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.7920173406600952, + "learning_rate": 1.1917324187502936e-05, + "loss": 0.3083, + "step": 18658, + "teacher_loss": 0.25450199842453003 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5328412055969238, + "learning_rate": 1.1915101453710953e-05, + "loss": 0.2336, + "step": 18659, + "teacher_loss": 0.20040258765220642 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 1.009603500366211, + "learning_rate": 1.1912878790646444e-05, + "loss": 0.3899, + "step": 18660, + "teacher_loss": 0.32107990980148315 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.4386749863624573, + "learning_rate": 1.1910656198360363e-05, + "loss": 0.2409, + "step": 18661, + "teacher_loss": 0.21890109777450562 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.21110102534294128, + "learning_rate": 1.1908433676903679e-05, + "loss": 0.1698, + "step": 18662, + "teacher_loss": 0.165176659822464 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.508363664150238, + "learning_rate": 1.1906211226327346e-05, + "loss": 0.2648, + "step": 18663, + "teacher_loss": 0.23777025938034058 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5425838232040405, + "learning_rate": 1.1903988846682301e-05, + "loss": 0.2806, + "step": 18664, + "teacher_loss": 0.25153613090515137 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6043205261230469, + "learning_rate": 1.1901766538019521e-05, + "loss": 0.5395, + "step": 18665, + "teacher_loss": 0.532261848449707 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.7202823162078857, + "learning_rate": 1.1899544300389942e-05, + "loss": 0.2975, + "step": 18666, + "teacher_loss": 0.2504733204841614 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5415602922439575, + "learning_rate": 1.189732213384451e-05, + "loss": 0.2531, + "step": 18667, + "teacher_loss": 0.22105497121810913 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.5099426507949829, + "learning_rate": 1.1895100038434188e-05, + "loss": 0.2872, + "step": 18668, + "teacher_loss": 0.26243531703948975 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.3420504033565521, + "learning_rate": 1.189287801420991e-05, + "loss": 0.2183, + "step": 18669, + "teacher_loss": 0.20453864336013794 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6645707488059998, + "learning_rate": 1.189065606122262e-05, + "loss": 0.3154, + "step": 18670, + "teacher_loss": 0.27655598521232605 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.7597312927246094, + "learning_rate": 1.1888434179523269e-05, + "loss": 0.25, + "step": 18671, + "teacher_loss": 0.1933150291442871 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.7268742322921753, + "learning_rate": 1.1886212369162794e-05, + "loss": 0.2375, + "step": 18672, + "teacher_loss": 0.183174729347229 + }, + { + "compression_loss": 0.0, + "epoch": 3.37, + "label_loss": 0.6598705053329468, + "learning_rate": 1.1883990630192128e-05, + "loss": 0.23, + "step": 18673, + "teacher_loss": 0.18221454322338104 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.345536470413208, + "learning_rate": 1.1881768962662219e-05, + "loss": 0.1599, + "step": 18674, + "teacher_loss": 0.13929212093353271 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.8778254389762878, + "learning_rate": 1.1879547366623999e-05, + "loss": 0.2356, + "step": 18675, + "teacher_loss": 0.16423200070858002 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.14495152235031128, + "learning_rate": 1.1877325842128398e-05, + "loss": 0.1695, + "step": 18676, + "teacher_loss": 0.17222189903259277 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.517376184463501, + "learning_rate": 1.1875104389226357e-05, + "loss": 0.3108, + "step": 18677, + "teacher_loss": 0.28788653016090393 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 1.1780054569244385, + "learning_rate": 1.1872883007968808e-05, + "loss": 0.2984, + "step": 18678, + "teacher_loss": 0.20069104433059692 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5623835921287537, + "learning_rate": 1.1870661698406671e-05, + "loss": 0.2576, + "step": 18679, + "teacher_loss": 0.22378107905387878 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.1455666869878769, + "learning_rate": 1.1868440460590876e-05, + "loss": 0.1617, + "step": 18680, + "teacher_loss": 0.16349145770072937 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.19573749601840973, + "learning_rate": 1.1866219294572357e-05, + "loss": 0.2109, + "step": 18681, + "teacher_loss": 0.2126309871673584 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5442487001419067, + "learning_rate": 1.1863998200402032e-05, + "loss": 0.3475, + "step": 18682, + "teacher_loss": 0.32569241523742676 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4690822958946228, + "learning_rate": 1.1861777178130823e-05, + "loss": 0.2952, + "step": 18683, + "teacher_loss": 0.2759130299091339 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.33882611989974976, + "learning_rate": 1.1859556227809659e-05, + "loss": 0.1751, + "step": 18684, + "teacher_loss": 0.15694256126880646 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.8876219987869263, + "learning_rate": 1.1857335349489455e-05, + "loss": 0.2238, + "step": 18685, + "teacher_loss": 0.1500946581363678 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4225117266178131, + "learning_rate": 1.1855114543221126e-05, + "loss": 0.2922, + "step": 18686, + "teacher_loss": 0.2777276039123535 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.3716357350349426, + "learning_rate": 1.1852893809055594e-05, + "loss": 0.1932, + "step": 18687, + "teacher_loss": 0.17339164018630981 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.45414185523986816, + "learning_rate": 1.1850673147043776e-05, + "loss": 0.194, + "step": 18688, + "teacher_loss": 0.16507622599601746 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.36255526542663574, + "learning_rate": 1.184845255723657e-05, + "loss": 0.1637, + "step": 18689, + "teacher_loss": 0.14162112772464752 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 1.1253622770309448, + "learning_rate": 1.1846232039684907e-05, + "loss": 0.2705, + "step": 18690, + "teacher_loss": 0.1755523681640625 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.3908260464668274, + "learning_rate": 1.1844011594439685e-05, + "loss": 0.2289, + "step": 18691, + "teacher_loss": 0.21087965369224548 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5511235594749451, + "learning_rate": 1.1841791221551809e-05, + "loss": 0.2649, + "step": 18692, + "teacher_loss": 0.23314881324768066 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5060559511184692, + "learning_rate": 1.1839570921072201e-05, + "loss": 0.2539, + "step": 18693, + "teacher_loss": 0.2258489429950714 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4552042484283447, + "learning_rate": 1.1837350693051754e-05, + "loss": 0.2167, + "step": 18694, + "teacher_loss": 0.1902216225862503 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.28415727615356445, + "learning_rate": 1.1835130537541369e-05, + "loss": 0.2435, + "step": 18695, + "teacher_loss": 0.23896238207817078 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6941683888435364, + "learning_rate": 1.1832910454591956e-05, + "loss": 0.2748, + "step": 18696, + "teacher_loss": 0.2282366305589676 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 1.0125057697296143, + "learning_rate": 1.183069044425441e-05, + "loss": 0.4065, + "step": 18697, + "teacher_loss": 0.3391650319099426 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5119051933288574, + "learning_rate": 1.1828470506579631e-05, + "loss": 0.2326, + "step": 18698, + "teacher_loss": 0.20162129402160645 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5098068714141846, + "learning_rate": 1.182625064161851e-05, + "loss": 0.26, + "step": 18699, + "teacher_loss": 0.23221921920776367 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.24741458892822266, + "learning_rate": 1.182403084942195e-05, + "loss": 0.2844, + "step": 18700, + "teacher_loss": 0.28849589824676514 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.751213550567627, + "learning_rate": 1.1821811130040844e-05, + "loss": 0.3532, + "step": 18701, + "teacher_loss": 0.30895668268203735 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.34117844700813293, + "learning_rate": 1.1819591483526073e-05, + "loss": 0.1769, + "step": 18702, + "teacher_loss": 0.15861183404922485 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4234802722930908, + "learning_rate": 1.181737190992854e-05, + "loss": 0.2672, + "step": 18703, + "teacher_loss": 0.24980738759040833 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.2781968116760254, + "learning_rate": 1.1815152409299126e-05, + "loss": 0.2325, + "step": 18704, + "teacher_loss": 0.22739183902740479 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.33883753418922424, + "learning_rate": 1.1812932981688715e-05, + "loss": 0.2382, + "step": 18705, + "teacher_loss": 0.22704745829105377 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.3931272029876709, + "learning_rate": 1.1810713627148196e-05, + "loss": 0.2447, + "step": 18706, + "teacher_loss": 0.22826120257377625 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6611278057098389, + "learning_rate": 1.1808494345728455e-05, + "loss": 0.2757, + "step": 18707, + "teacher_loss": 0.23289810121059418 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.39021313190460205, + "learning_rate": 1.1806275137480365e-05, + "loss": 0.2206, + "step": 18708, + "teacher_loss": 0.20180051028728485 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5294166803359985, + "learning_rate": 1.1804056002454814e-05, + "loss": 0.2582, + "step": 18709, + "teacher_loss": 0.22805221378803253 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.35141512751579285, + "learning_rate": 1.1801836940702678e-05, + "loss": 0.2072, + "step": 18710, + "teacher_loss": 0.19123144447803497 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5447932481765747, + "learning_rate": 1.1799617952274829e-05, + "loss": 0.264, + "step": 18711, + "teacher_loss": 0.23275363445281982 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.46187686920166016, + "learning_rate": 1.1797399037222148e-05, + "loss": 0.2268, + "step": 18712, + "teacher_loss": 0.20068825781345367 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6060706377029419, + "learning_rate": 1.1795180195595509e-05, + "loss": 0.2154, + "step": 18713, + "teacher_loss": 0.17198437452316284 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5784647464752197, + "learning_rate": 1.1792961427445768e-05, + "loss": 0.2095, + "step": 18714, + "teacher_loss": 0.16853109002113342 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6584532856941223, + "learning_rate": 1.1790742732823818e-05, + "loss": 0.3387, + "step": 18715, + "teacher_loss": 0.30313533544540405 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5176960825920105, + "learning_rate": 1.178852411178051e-05, + "loss": 0.2262, + "step": 18716, + "teacher_loss": 0.1938420534133911 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4572313725948334, + "learning_rate": 1.178630556436671e-05, + "loss": 0.2197, + "step": 18717, + "teacher_loss": 0.1933315098285675 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6383875608444214, + "learning_rate": 1.17840870906333e-05, + "loss": 0.2734, + "step": 18718, + "teacher_loss": 0.2328152358531952 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4089704155921936, + "learning_rate": 1.1781868690631125e-05, + "loss": 0.4248, + "step": 18719, + "teacher_loss": 0.42656123638153076 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.44494473934173584, + "learning_rate": 1.1779650364411055e-05, + "loss": 0.2041, + "step": 18720, + "teacher_loss": 0.17730557918548584 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6373677253723145, + "learning_rate": 1.1777432112023942e-05, + "loss": 0.3568, + "step": 18721, + "teacher_loss": 0.32558226585388184 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4969397783279419, + "learning_rate": 1.1775213933520653e-05, + "loss": 0.3057, + "step": 18722, + "teacher_loss": 0.28441283106803894 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5830061435699463, + "learning_rate": 1.1772995828952038e-05, + "loss": 0.2813, + "step": 18723, + "teacher_loss": 0.24774686992168427 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.6131986975669861, + "learning_rate": 1.1770777798368954e-05, + "loss": 0.3266, + "step": 18724, + "teacher_loss": 0.2947281002998352 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4338865876197815, + "learning_rate": 1.1768559841822254e-05, + "loss": 0.291, + "step": 18725, + "teacher_loss": 0.27515262365341187 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.5621605515480042, + "learning_rate": 1.1766341959362792e-05, + "loss": 0.2911, + "step": 18726, + "teacher_loss": 0.2609741687774658 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.4560585618019104, + "learning_rate": 1.1764124151041406e-05, + "loss": 0.2429, + "step": 18727, + "teacher_loss": 0.21925197541713715 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.18669575452804565, + "learning_rate": 1.1761906416908959e-05, + "loss": 0.152, + "step": 18728, + "teacher_loss": 0.14810970425605774 + }, + { + "compression_loss": 0.0, + "epoch": 3.38, + "label_loss": 0.3419337272644043, + "learning_rate": 1.1759688757016288e-05, + "loss": 0.2104, + "step": 18729, + "teacher_loss": 0.19575850665569305 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.4429096579551697, + "learning_rate": 1.1757471171414235e-05, + "loss": 0.2262, + "step": 18730, + "teacher_loss": 0.20216958224773407 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.4647054970264435, + "learning_rate": 1.175525366015365e-05, + "loss": 0.2032, + "step": 18731, + "teacher_loss": 0.1740989089012146 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2213570773601532, + "learning_rate": 1.175303622328537e-05, + "loss": 0.2293, + "step": 18732, + "teacher_loss": 0.23013561964035034 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.34916478395462036, + "learning_rate": 1.175081886086023e-05, + "loss": 0.1751, + "step": 18733, + "teacher_loss": 0.15580236911773682 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.30279654264450073, + "learning_rate": 1.1748601572929076e-05, + "loss": 0.1496, + "step": 18734, + "teacher_loss": 0.13256222009658813 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.4584543704986572, + "learning_rate": 1.1746384359542741e-05, + "loss": 0.2553, + "step": 18735, + "teacher_loss": 0.23273366689682007 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.42151910066604614, + "learning_rate": 1.1744167220752053e-05, + "loss": 0.2476, + "step": 18736, + "teacher_loss": 0.22827471792697906 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5823575258255005, + "learning_rate": 1.1741950156607852e-05, + "loss": 0.2555, + "step": 18737, + "teacher_loss": 0.2191745489835739 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.18476945161819458, + "learning_rate": 1.173973316716097e-05, + "loss": 0.1987, + "step": 18738, + "teacher_loss": 0.20022518932819366 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.7398821115493774, + "learning_rate": 1.173751625246223e-05, + "loss": 0.7045, + "step": 18739, + "teacher_loss": 0.7005237936973572 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.3821154534816742, + "learning_rate": 1.1735299412562455e-05, + "loss": 0.2091, + "step": 18740, + "teacher_loss": 0.18984994292259216 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.30538034439086914, + "learning_rate": 1.173308264751248e-05, + "loss": 0.2451, + "step": 18741, + "teacher_loss": 0.2384234070777893 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2657424211502075, + "learning_rate": 1.1730865957363128e-05, + "loss": 0.2131, + "step": 18742, + "teacher_loss": 0.20729003846645355 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.24790242314338684, + "learning_rate": 1.1728649342165212e-05, + "loss": 0.1868, + "step": 18743, + "teacher_loss": 0.18000148236751556 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5210214853286743, + "learning_rate": 1.1726432801969562e-05, + "loss": 0.2818, + "step": 18744, + "teacher_loss": 0.2552551329135895 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5517781376838684, + "learning_rate": 1.1724216336826994e-05, + "loss": 0.2443, + "step": 18745, + "teacher_loss": 0.2101515382528305 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.32161837816238403, + "learning_rate": 1.172199994678832e-05, + "loss": 0.2124, + "step": 18746, + "teacher_loss": 0.20023053884506226 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.6778246164321899, + "learning_rate": 1.1719783631904364e-05, + "loss": 0.2216, + "step": 18747, + "teacher_loss": 0.17085419595241547 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2519404888153076, + "learning_rate": 1.1717567392225934e-05, + "loss": 0.177, + "step": 18748, + "teacher_loss": 0.1687122881412506 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.18910522758960724, + "learning_rate": 1.1715351227803838e-05, + "loss": 0.1924, + "step": 18749, + "teacher_loss": 0.1927390992641449 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5733964443206787, + "learning_rate": 1.1713135138688894e-05, + "loss": 0.5271, + "step": 18750, + "teacher_loss": 0.521993100643158 + }, + { + "epoch": 3.39, + "eval_exact_match": 79.99053926206244, + "eval_f1": 87.33547061003178, + "step": 18750 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2888983488082886, + "learning_rate": 1.1710919124931912e-05, + "loss": 0.2211, + "step": 18751, + "teacher_loss": 0.21358919143676758 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.14913861453533173, + "learning_rate": 1.1708703186583682e-05, + "loss": 0.1729, + "step": 18752, + "teacher_loss": 0.17550814151763916 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.17416149377822876, + "learning_rate": 1.1706487323695032e-05, + "loss": 0.2119, + "step": 18753, + "teacher_loss": 0.21605046093463898 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5031924843788147, + "learning_rate": 1.1704271536316747e-05, + "loss": 0.2446, + "step": 18754, + "teacher_loss": 0.21586453914642334 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 1.1033008098602295, + "learning_rate": 1.1702055824499634e-05, + "loss": 0.393, + "step": 18755, + "teacher_loss": 0.3140692710876465 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.614800214767456, + "learning_rate": 1.1699840188294493e-05, + "loss": 0.4199, + "step": 18756, + "teacher_loss": 0.39829424023628235 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.3757333755493164, + "learning_rate": 1.1697624627752125e-05, + "loss": 0.1729, + "step": 18757, + "teacher_loss": 0.15040722489356995 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.3248414397239685, + "learning_rate": 1.169540914292332e-05, + "loss": 0.1741, + "step": 18758, + "teacher_loss": 0.15730169415473938 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.716118335723877, + "learning_rate": 1.1693193733858877e-05, + "loss": 0.2567, + "step": 18759, + "teacher_loss": 0.20560383796691895 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.21993760764598846, + "learning_rate": 1.169097840060959e-05, + "loss": 0.1474, + "step": 18760, + "teacher_loss": 0.1393035352230072 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.46032804250717163, + "learning_rate": 1.1688763143226247e-05, + "loss": 0.1968, + "step": 18761, + "teacher_loss": 0.16754159331321716 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5101932287216187, + "learning_rate": 1.1686547961759629e-05, + "loss": 0.2438, + "step": 18762, + "teacher_loss": 0.21421505510807037 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2799411416053772, + "learning_rate": 1.1684332856260543e-05, + "loss": 0.1226, + "step": 18763, + "teacher_loss": 0.10516917705535889 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.7636371850967407, + "learning_rate": 1.1682117826779759e-05, + "loss": 0.5698, + "step": 18764, + "teacher_loss": 0.5483071208000183 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.32063624262809753, + "learning_rate": 1.167990287336806e-05, + "loss": 0.2153, + "step": 18765, + "teacher_loss": 0.2036440670490265 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2634209096431732, + "learning_rate": 1.1677687996076244e-05, + "loss": 0.1549, + "step": 18766, + "teacher_loss": 0.14281633496284485 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2867833077907562, + "learning_rate": 1.1675473194955078e-05, + "loss": 0.1782, + "step": 18767, + "teacher_loss": 0.16610780358314514 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.3051109313964844, + "learning_rate": 1.1673258470055341e-05, + "loss": 0.213, + "step": 18768, + "teacher_loss": 0.20281875133514404 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.3268401622772217, + "learning_rate": 1.1671043821427817e-05, + "loss": 0.2527, + "step": 18769, + "teacher_loss": 0.24445046484470367 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.9010107517242432, + "learning_rate": 1.1668829249123276e-05, + "loss": 0.673, + "step": 18770, + "teacher_loss": 0.6476960778236389 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.6698606014251709, + "learning_rate": 1.166661475319249e-05, + "loss": 0.2396, + "step": 18771, + "teacher_loss": 0.19180506467819214 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2742948532104492, + "learning_rate": 1.1664400333686237e-05, + "loss": 0.1782, + "step": 18772, + "teacher_loss": 0.16748803853988647 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.8050655126571655, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.2999, + "step": 18773, + "teacher_loss": 0.2437790334224701 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.4336487650871277, + "learning_rate": 1.1659971724150397e-05, + "loss": 0.2502, + "step": 18774, + "teacher_loss": 0.22984032332897186 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2619550824165344, + "learning_rate": 1.1657757534222346e-05, + "loss": 0.1686, + "step": 18775, + "teacher_loss": 0.15818355977535248 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.3273872137069702, + "learning_rate": 1.1655543420921898e-05, + "loss": 0.2071, + "step": 18776, + "teacher_loss": 0.19372232258319855 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5201416015625, + "learning_rate": 1.1653329384299804e-05, + "loss": 0.2211, + "step": 18777, + "teacher_loss": 0.18786412477493286 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.5441088080406189, + "learning_rate": 1.165111542440684e-05, + "loss": 0.2664, + "step": 18778, + "teacher_loss": 0.2355455756187439 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.18394622206687927, + "learning_rate": 1.1648901541293758e-05, + "loss": 0.1833, + "step": 18779, + "teacher_loss": 0.18328258395195007 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.1553734540939331, + "learning_rate": 1.1646687735011311e-05, + "loss": 0.1751, + "step": 18780, + "teacher_loss": 0.17725330591201782 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2886493504047394, + "learning_rate": 1.1644474005610266e-05, + "loss": 0.2365, + "step": 18781, + "teacher_loss": 0.2307392954826355 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2710658311843872, + "learning_rate": 1.164226035314137e-05, + "loss": 0.1925, + "step": 18782, + "teacher_loss": 0.1837441474199295 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.4072994589805603, + "learning_rate": 1.1640046777655378e-05, + "loss": 0.2129, + "step": 18783, + "teacher_loss": 0.19130247831344604 + }, + { + "compression_loss": 0.0, + "epoch": 3.39, + "label_loss": 0.2973078787326813, + "learning_rate": 1.1637833279203036e-05, + "loss": 0.2579, + "step": 18784, + "teacher_loss": 0.2534712255001068 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4799741804599762, + "learning_rate": 1.1635619857835102e-05, + "loss": 0.3005, + "step": 18785, + "teacher_loss": 0.28051507472991943 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.3238498270511627, + "learning_rate": 1.1633406513602318e-05, + "loss": 0.2517, + "step": 18786, + "teacher_loss": 0.24368642270565033 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.5011175870895386, + "learning_rate": 1.1631193246555422e-05, + "loss": 0.2745, + "step": 18787, + "teacher_loss": 0.2493211328983307 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.3754923641681671, + "learning_rate": 1.1628980056745172e-05, + "loss": 0.1807, + "step": 18788, + "teacher_loss": 0.1590133011341095 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.23842690885066986, + "learning_rate": 1.16267669442223e-05, + "loss": 0.1986, + "step": 18789, + "teacher_loss": 0.19420309364795685 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.24969080090522766, + "learning_rate": 1.1624553909037541e-05, + "loss": 0.1678, + "step": 18790, + "teacher_loss": 0.158721923828125 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.6991925835609436, + "learning_rate": 1.1622340951241652e-05, + "loss": 0.2062, + "step": 18791, + "teacher_loss": 0.15144211053848267 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.2693486511707306, + "learning_rate": 1.1620128070885354e-05, + "loss": 0.2945, + "step": 18792, + "teacher_loss": 0.2973131537437439 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.908923327922821, + "learning_rate": 1.1617915268019385e-05, + "loss": 0.3342, + "step": 18793, + "teacher_loss": 0.2703251838684082 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.3230920135974884, + "learning_rate": 1.161570254269448e-05, + "loss": 0.1594, + "step": 18794, + "teacher_loss": 0.14115728437900543 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.757338285446167, + "learning_rate": 1.161348989496137e-05, + "loss": 0.2312, + "step": 18795, + "teacher_loss": 0.17272251844406128 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4868299961090088, + "learning_rate": 1.1611277324870778e-05, + "loss": 0.3613, + "step": 18796, + "teacher_loss": 0.3473135828971863 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.32114261388778687, + "learning_rate": 1.1609064832473442e-05, + "loss": 0.2353, + "step": 18797, + "teacher_loss": 0.22577497363090515 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.34793853759765625, + "learning_rate": 1.1606852417820085e-05, + "loss": 0.1965, + "step": 18798, + "teacher_loss": 0.1796720325946808 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.20068097114562988, + "learning_rate": 1.1604640080961422e-05, + "loss": 0.2359, + "step": 18799, + "teacher_loss": 0.2398664951324463 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.3314172327518463, + "learning_rate": 1.1602427821948187e-05, + "loss": 0.1943, + "step": 18800, + "teacher_loss": 0.17908601462841034 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.48195499181747437, + "learning_rate": 1.1600215640831098e-05, + "loss": 0.2332, + "step": 18801, + "teacher_loss": 0.2056141197681427 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.2318498194217682, + "learning_rate": 1.159800353766087e-05, + "loss": 0.1417, + "step": 18802, + "teacher_loss": 0.1316973865032196 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4067096710205078, + "learning_rate": 1.1595791512488213e-05, + "loss": 0.2447, + "step": 18803, + "teacher_loss": 0.226697638630867 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.6279998421669006, + "learning_rate": 1.159357956536386e-05, + "loss": 0.2522, + "step": 18804, + "teacher_loss": 0.21049439907073975 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.6382004022598267, + "learning_rate": 1.1591367696338512e-05, + "loss": 0.2299, + "step": 18805, + "teacher_loss": 0.18451760709285736 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4014244079589844, + "learning_rate": 1.1589155905462878e-05, + "loss": 0.2257, + "step": 18806, + "teacher_loss": 0.20615805685520172 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.28749698400497437, + "learning_rate": 1.1586944192787678e-05, + "loss": 0.271, + "step": 18807, + "teacher_loss": 0.26920080184936523 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.6083661317825317, + "learning_rate": 1.1584732558363613e-05, + "loss": 0.291, + "step": 18808, + "teacher_loss": 0.25577524304389954 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.39062899351119995, + "learning_rate": 1.158252100224139e-05, + "loss": 0.196, + "step": 18809, + "teacher_loss": 0.17433369159698486 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.6228368878364563, + "learning_rate": 1.1580309524471718e-05, + "loss": 0.2506, + "step": 18810, + "teacher_loss": 0.20923474431037903 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 1.1299842596054077, + "learning_rate": 1.1578098125105297e-05, + "loss": 0.3197, + "step": 18811, + "teacher_loss": 0.2296578735113144 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.41368335485458374, + "learning_rate": 1.1575886804192816e-05, + "loss": 0.4057, + "step": 18812, + "teacher_loss": 0.4048406481742859 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.7156052589416504, + "learning_rate": 1.1573675561784998e-05, + "loss": 0.2779, + "step": 18813, + "teacher_loss": 0.22924241423606873 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4381047785282135, + "learning_rate": 1.157146439793252e-05, + "loss": 0.1563, + "step": 18814, + "teacher_loss": 0.1250041127204895 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4445344805717468, + "learning_rate": 1.156925331268608e-05, + "loss": 0.2076, + "step": 18815, + "teacher_loss": 0.18125315010547638 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.272841215133667, + "learning_rate": 1.1567042306096383e-05, + "loss": 0.1866, + "step": 18816, + "teacher_loss": 0.1770443618297577 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.5708388090133667, + "learning_rate": 1.1564831378214112e-05, + "loss": 0.1979, + "step": 18817, + "teacher_loss": 0.15647542476654053 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.21256472170352936, + "learning_rate": 1.1562620529089955e-05, + "loss": 0.1946, + "step": 18818, + "teacher_loss": 0.19255292415618896 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.5720242261886597, + "learning_rate": 1.1560409758774606e-05, + "loss": 0.3007, + "step": 18819, + "teacher_loss": 0.2705628275871277 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.37271326780319214, + "learning_rate": 1.155819906731875e-05, + "loss": 0.2058, + "step": 18820, + "teacher_loss": 0.18724578619003296 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.5156081914901733, + "learning_rate": 1.1555988454773065e-05, + "loss": 0.2233, + "step": 18821, + "teacher_loss": 0.19086718559265137 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.46330782771110535, + "learning_rate": 1.1553777921188244e-05, + "loss": 0.2426, + "step": 18822, + "teacher_loss": 0.2180497646331787 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.36449992656707764, + "learning_rate": 1.1551567466614961e-05, + "loss": 0.2528, + "step": 18823, + "teacher_loss": 0.24043524265289307 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4515899419784546, + "learning_rate": 1.1549357091103902e-05, + "loss": 0.2779, + "step": 18824, + "teacher_loss": 0.2586180865764618 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.38402336835861206, + "learning_rate": 1.154714679470573e-05, + "loss": 0.2219, + "step": 18825, + "teacher_loss": 0.20390555262565613 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.17274367809295654, + "learning_rate": 1.1544936577471138e-05, + "loss": 0.2022, + "step": 18826, + "teacher_loss": 0.20545849204063416 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.16242387890815735, + "learning_rate": 1.1542726439450788e-05, + "loss": 0.2131, + "step": 18827, + "teacher_loss": 0.218703031539917 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.49619704484939575, + "learning_rate": 1.1540516380695354e-05, + "loss": 0.4296, + "step": 18828, + "teacher_loss": 0.4221862256526947 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.4356083571910858, + "learning_rate": 1.153830640125551e-05, + "loss": 0.2038, + "step": 18829, + "teacher_loss": 0.17801763117313385 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.3323622941970825, + "learning_rate": 1.1536096501181921e-05, + "loss": 0.2902, + "step": 18830, + "teacher_loss": 0.28548404574394226 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.9321970343589783, + "learning_rate": 1.153388668052525e-05, + "loss": 0.2885, + "step": 18831, + "teacher_loss": 0.2169387936592102 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.2834605574607849, + "learning_rate": 1.153167693933617e-05, + "loss": 0.1637, + "step": 18832, + "teacher_loss": 0.15038307011127472 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.7191187739372253, + "learning_rate": 1.1529467277665339e-05, + "loss": 0.2987, + "step": 18833, + "teacher_loss": 0.25203484296798706 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.47329574823379517, + "learning_rate": 1.1527257695563415e-05, + "loss": 0.2553, + "step": 18834, + "teacher_loss": 0.23102495074272156 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.29870399832725525, + "learning_rate": 1.1525048193081062e-05, + "loss": 0.3432, + "step": 18835, + "teacher_loss": 0.34815606474876404 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.6253160238265991, + "learning_rate": 1.1522838770268941e-05, + "loss": 0.226, + "step": 18836, + "teacher_loss": 0.18161892890930176 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.3708431124687195, + "learning_rate": 1.1520629427177691e-05, + "loss": 0.2483, + "step": 18837, + "teacher_loss": 0.23467296361923218 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.42725205421447754, + "learning_rate": 1.1518420163857984e-05, + "loss": 0.2034, + "step": 18838, + "teacher_loss": 0.178545743227005 + }, + { + "compression_loss": 0.0, + "epoch": 3.4, + "label_loss": 0.14623664319515228, + "learning_rate": 1.1516210980360465e-05, + "loss": 0.1797, + "step": 18839, + "teacher_loss": 0.1834130734205246 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.22204023599624634, + "learning_rate": 1.1514001876735775e-05, + "loss": 0.1841, + "step": 18840, + "teacher_loss": 0.17983055114746094 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.7856538891792297, + "learning_rate": 1.151179285303458e-05, + "loss": 0.2888, + "step": 18841, + "teacher_loss": 0.23359917104244232 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.42608118057250977, + "learning_rate": 1.1509583909307513e-05, + "loss": 0.2135, + "step": 18842, + "teacher_loss": 0.18987855315208435 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.7350302338600159, + "learning_rate": 1.1507375045605224e-05, + "loss": 0.2659, + "step": 18843, + "teacher_loss": 0.2137278914451599 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4052891731262207, + "learning_rate": 1.150516626197835e-05, + "loss": 0.1503, + "step": 18844, + "teacher_loss": 0.12199300527572632 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 1.2065277099609375, + "learning_rate": 1.1502957558477537e-05, + "loss": 0.2977, + "step": 18845, + "teacher_loss": 0.19666868448257446 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4308619201183319, + "learning_rate": 1.1500748935153423e-05, + "loss": 0.2302, + "step": 18846, + "teacher_loss": 0.20790283381938934 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.2771860957145691, + "learning_rate": 1.1498540392056645e-05, + "loss": 0.1321, + "step": 18847, + "teacher_loss": 0.1160154789686203 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4189469814300537, + "learning_rate": 1.1496331929237837e-05, + "loss": 0.2548, + "step": 18848, + "teacher_loss": 0.23658421635627747 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.12749043107032776, + "learning_rate": 1.1494123546747638e-05, + "loss": 0.1811, + "step": 18849, + "teacher_loss": 0.18701088428497314 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.32508015632629395, + "learning_rate": 1.1491915244636665e-05, + "loss": 0.2438, + "step": 18850, + "teacher_loss": 0.2347555309534073 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.6691660284996033, + "learning_rate": 1.1489707022955566e-05, + "loss": 0.2684, + "step": 18851, + "teacher_loss": 0.2238641381263733 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.21738111972808838, + "learning_rate": 1.148749888175496e-05, + "loss": 0.1561, + "step": 18852, + "teacher_loss": 0.14927466213703156 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.6501132249832153, + "learning_rate": 1.1485290821085468e-05, + "loss": 0.2127, + "step": 18853, + "teacher_loss": 0.16410604119300842 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.49390333890914917, + "learning_rate": 1.1483082840997725e-05, + "loss": 0.3164, + "step": 18854, + "teacher_loss": 0.2966894805431366 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4720699191093445, + "learning_rate": 1.1480874941542347e-05, + "loss": 0.266, + "step": 18855, + "teacher_loss": 0.24312396347522736 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.287192702293396, + "learning_rate": 1.1478667122769954e-05, + "loss": 0.2206, + "step": 18856, + "teacher_loss": 0.21320399641990662 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.3090425133705139, + "learning_rate": 1.1476459384731168e-05, + "loss": 0.1725, + "step": 18857, + "teacher_loss": 0.15732666850090027 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.2239268720149994, + "learning_rate": 1.1474251727476604e-05, + "loss": 0.133, + "step": 18858, + "teacher_loss": 0.12293249368667603 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.45105957984924316, + "learning_rate": 1.1472044151056873e-05, + "loss": 0.2648, + "step": 18859, + "teacher_loss": 0.24412043392658234 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 1.0731204748153687, + "learning_rate": 1.14698366555226e-05, + "loss": 0.4085, + "step": 18860, + "teacher_loss": 0.3346370458602905 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.47917550802230835, + "learning_rate": 1.1467629240924388e-05, + "loss": 0.247, + "step": 18861, + "teacher_loss": 0.22118344902992249 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.7350554466247559, + "learning_rate": 1.1465421907312836e-05, + "loss": 0.2628, + "step": 18862, + "teacher_loss": 0.21031107008457184 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.5233379602432251, + "learning_rate": 1.1463214654738574e-05, + "loss": 0.3276, + "step": 18863, + "teacher_loss": 0.3058304786682129 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.3252178430557251, + "learning_rate": 1.1461007483252193e-05, + "loss": 0.2134, + "step": 18864, + "teacher_loss": 0.20097842812538147 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.2454577088356018, + "learning_rate": 1.1458800392904301e-05, + "loss": 0.2453, + "step": 18865, + "teacher_loss": 0.24530914425849915 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.46905744075775146, + "learning_rate": 1.1456593383745494e-05, + "loss": 0.2399, + "step": 18866, + "teacher_loss": 0.21445605158805847 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.2620818316936493, + "learning_rate": 1.1454386455826381e-05, + "loss": 0.1598, + "step": 18867, + "teacher_loss": 0.14843472838401794 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.20799601078033447, + "learning_rate": 1.1452179609197555e-05, + "loss": 0.2535, + "step": 18868, + "teacher_loss": 0.25855886936187744 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.3932074010372162, + "learning_rate": 1.1449972843909612e-05, + "loss": 0.2104, + "step": 18869, + "teacher_loss": 0.19013071060180664 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.43170738220214844, + "learning_rate": 1.1447766160013151e-05, + "loss": 0.2089, + "step": 18870, + "teacher_loss": 0.18413732945919037 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4197104871273041, + "learning_rate": 1.1445559557558762e-05, + "loss": 0.26, + "step": 18871, + "teacher_loss": 0.24226155877113342 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4333374500274658, + "learning_rate": 1.1443353036597032e-05, + "loss": 0.213, + "step": 18872, + "teacher_loss": 0.18848282098770142 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.8702367544174194, + "learning_rate": 1.1441146597178557e-05, + "loss": 0.3089, + "step": 18873, + "teacher_loss": 0.24652233719825745 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4325931966304779, + "learning_rate": 1.1438940239353924e-05, + "loss": 0.3264, + "step": 18874, + "teacher_loss": 0.31460779905319214 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.22639372944831848, + "learning_rate": 1.1436733963173704e-05, + "loss": 0.2599, + "step": 18875, + "teacher_loss": 0.26357021927833557 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.5459451675415039, + "learning_rate": 1.1434527768688503e-05, + "loss": 0.3696, + "step": 18876, + "teacher_loss": 0.3500426411628723 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.9373420476913452, + "learning_rate": 1.1432321655948885e-05, + "loss": 0.2586, + "step": 18877, + "teacher_loss": 0.18318231403827667 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.6682285070419312, + "learning_rate": 1.143011562500543e-05, + "loss": 0.2774, + "step": 18878, + "teacher_loss": 0.2339474856853485 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.3376731872558594, + "learning_rate": 1.1427909675908728e-05, + "loss": 0.2713, + "step": 18879, + "teacher_loss": 0.26393935084342957 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.5625085234642029, + "learning_rate": 1.1425703808709344e-05, + "loss": 0.2991, + "step": 18880, + "teacher_loss": 0.2698010206222534 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.27605465054512024, + "learning_rate": 1.1423498023457855e-05, + "loss": 0.1733, + "step": 18881, + "teacher_loss": 0.16189351677894592 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.27516597509384155, + "learning_rate": 1.1421292320204836e-05, + "loss": 0.2262, + "step": 18882, + "teacher_loss": 0.22075891494750977 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.3369408845901489, + "learning_rate": 1.1419086699000854e-05, + "loss": 0.2466, + "step": 18883, + "teacher_loss": 0.23659370839595795 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.48085057735443115, + "learning_rate": 1.1416881159896473e-05, + "loss": 0.221, + "step": 18884, + "teacher_loss": 0.19208906590938568 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.19140680134296417, + "learning_rate": 1.141467570294227e-05, + "loss": 0.2049, + "step": 18885, + "teacher_loss": 0.20636822283267975 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.5554202198982239, + "learning_rate": 1.1412470328188807e-05, + "loss": 0.2237, + "step": 18886, + "teacher_loss": 0.18686646223068237 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4390782117843628, + "learning_rate": 1.1410265035686639e-05, + "loss": 0.3179, + "step": 18887, + "teacher_loss": 0.3044508695602417 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.652686357498169, + "learning_rate": 1.1408059825486324e-05, + "loss": 0.3145, + "step": 18888, + "teacher_loss": 0.2768961489200592 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.47595202922821045, + "learning_rate": 1.1405854697638439e-05, + "loss": 0.1954, + "step": 18889, + "teacher_loss": 0.16426897048950195 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.8647781610488892, + "learning_rate": 1.1403649652193526e-05, + "loss": 0.6722, + "step": 18890, + "teacher_loss": 0.6508535146713257 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.46104782819747925, + "learning_rate": 1.140144468920214e-05, + "loss": 0.3176, + "step": 18891, + "teacher_loss": 0.30160611867904663 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4557260274887085, + "learning_rate": 1.1399239808714843e-05, + "loss": 0.2449, + "step": 18892, + "teacher_loss": 0.22148439288139343 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4610227644443512, + "learning_rate": 1.139703501078218e-05, + "loss": 0.1951, + "step": 18893, + "teacher_loss": 0.16556695103645325 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.4284716248512268, + "learning_rate": 1.1394830295454701e-05, + "loss": 0.228, + "step": 18894, + "teacher_loss": 0.20577165484428406 + }, + { + "compression_loss": 0.0, + "epoch": 3.41, + "label_loss": 0.17895889282226562, + "learning_rate": 1.1392625662782958e-05, + "loss": 0.1523, + "step": 18895, + "teacher_loss": 0.14929591119289398 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.4584330916404724, + "learning_rate": 1.139042111281749e-05, + "loss": 0.2571, + "step": 18896, + "teacher_loss": 0.2346741408109665 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.5824363827705383, + "learning_rate": 1.1388216645608842e-05, + "loss": 0.2321, + "step": 18897, + "teacher_loss": 0.19319449365139008 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.35860949754714966, + "learning_rate": 1.1386012261207561e-05, + "loss": 0.1794, + "step": 18898, + "teacher_loss": 0.1595376580953598 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.29824715852737427, + "learning_rate": 1.1383807959664189e-05, + "loss": 0.2217, + "step": 18899, + "teacher_loss": 0.2132459282875061 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.807745099067688, + "learning_rate": 1.1381603741029247e-05, + "loss": 0.2959, + "step": 18900, + "teacher_loss": 0.23897285759449005 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.20956961810588837, + "learning_rate": 1.137939960535329e-05, + "loss": 0.2259, + "step": 18901, + "teacher_loss": 0.22776854038238525 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.6507896184921265, + "learning_rate": 1.1377195552686845e-05, + "loss": 0.3497, + "step": 18902, + "teacher_loss": 0.316256046295166 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.38403183221817017, + "learning_rate": 1.1374991583080441e-05, + "loss": 0.1905, + "step": 18903, + "teacher_loss": 0.1689785271883011 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.3045749366283417, + "learning_rate": 1.1372787696584612e-05, + "loss": 0.2466, + "step": 18904, + "teacher_loss": 0.24017807841300964 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.40457355976104736, + "learning_rate": 1.1370583893249888e-05, + "loss": 0.1895, + "step": 18905, + "teacher_loss": 0.16555386781692505 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.3533029556274414, + "learning_rate": 1.1368380173126793e-05, + "loss": 0.1632, + "step": 18906, + "teacher_loss": 0.14205724000930786 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.49052295088768005, + "learning_rate": 1.1366176536265852e-05, + "loss": 0.2564, + "step": 18907, + "teacher_loss": 0.23042628169059753 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.4184781610965729, + "learning_rate": 1.1363972982717588e-05, + "loss": 0.211, + "step": 18908, + "teacher_loss": 0.18789513409137726 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.37580859661102295, + "learning_rate": 1.1361769512532529e-05, + "loss": 0.2091, + "step": 18909, + "teacher_loss": 0.19052281975746155 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.6779211163520813, + "learning_rate": 1.1359566125761173e-05, + "loss": 0.2784, + "step": 18910, + "teacher_loss": 0.23397746682167053 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.36147624254226685, + "learning_rate": 1.1357362822454062e-05, + "loss": 0.2438, + "step": 18911, + "teacher_loss": 0.23074783384799957 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.23682788014411926, + "learning_rate": 1.13551596026617e-05, + "loss": 0.19, + "step": 18912, + "teacher_loss": 0.18476463854312897 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.35913145542144775, + "learning_rate": 1.1352956466434592e-05, + "loss": 0.188, + "step": 18913, + "teacher_loss": 0.16894227266311646 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.5400506854057312, + "learning_rate": 1.1350753413823269e-05, + "loss": 0.295, + "step": 18914, + "teacher_loss": 0.26782697439193726 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.1977507621049881, + "learning_rate": 1.1348550444878224e-05, + "loss": 0.1735, + "step": 18915, + "teacher_loss": 0.1708141267299652 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.2568507194519043, + "learning_rate": 1.1346347559649966e-05, + "loss": 0.2768, + "step": 18916, + "teacher_loss": 0.2790156602859497 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.6909016370773315, + "learning_rate": 1.1344144758189009e-05, + "loss": 0.2409, + "step": 18917, + "teacher_loss": 0.19092166423797607 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.7165605425834656, + "learning_rate": 1.134194204054585e-05, + "loss": 0.3382, + "step": 18918, + "teacher_loss": 0.2961709499359131 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.577447235584259, + "learning_rate": 1.133973940677099e-05, + "loss": 0.2308, + "step": 18919, + "teacher_loss": 0.192308709025383 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.2538950443267822, + "learning_rate": 1.1337536856914938e-05, + "loss": 0.196, + "step": 18920, + "teacher_loss": 0.18960469961166382 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.7067152261734009, + "learning_rate": 1.1335334391028182e-05, + "loss": 0.25, + "step": 18921, + "teacher_loss": 0.19928410649299622 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.7966921329498291, + "learning_rate": 1.1333132009161218e-05, + "loss": 0.3498, + "step": 18922, + "teacher_loss": 0.3001983165740967 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.7410800457000732, + "learning_rate": 1.1330929711364547e-05, + "loss": 0.2545, + "step": 18923, + "teacher_loss": 0.2004774808883667 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.6805134415626526, + "learning_rate": 1.1328727497688663e-05, + "loss": 0.2516, + "step": 18924, + "teacher_loss": 0.20390653610229492 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.7523657083511353, + "learning_rate": 1.1326525368184037e-05, + "loss": 0.31, + "step": 18925, + "teacher_loss": 0.260883092880249 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.23981522023677826, + "learning_rate": 1.1324323322901181e-05, + "loss": 0.1549, + "step": 18926, + "teacher_loss": 0.14540995657444 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.506084144115448, + "learning_rate": 1.132212136189057e-05, + "loss": 0.2565, + "step": 18927, + "teacher_loss": 0.228807270526886 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 1.0514582395553589, + "learning_rate": 1.1319919485202687e-05, + "loss": 0.3752, + "step": 18928, + "teacher_loss": 0.3000204861164093 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.39114874601364136, + "learning_rate": 1.1317717692888014e-05, + "loss": 0.2489, + "step": 18929, + "teacher_loss": 0.23314209282398224 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.46142902970314026, + "learning_rate": 1.1315515984997038e-05, + "loss": 0.3135, + "step": 18930, + "teacher_loss": 0.29708659648895264 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.5988812446594238, + "learning_rate": 1.1313314361580234e-05, + "loss": 0.2301, + "step": 18931, + "teacher_loss": 0.1890873908996582 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.4074750542640686, + "learning_rate": 1.1311112822688074e-05, + "loss": 0.1883, + "step": 18932, + "teacher_loss": 0.16400037705898285 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.41760775446891785, + "learning_rate": 1.130891136837104e-05, + "loss": 0.2281, + "step": 18933, + "teacher_loss": 0.2070143222808838 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.6495864391326904, + "learning_rate": 1.1306709998679606e-05, + "loss": 0.263, + "step": 18934, + "teacher_loss": 0.22009284794330597 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.3988271653652191, + "learning_rate": 1.130450871366423e-05, + "loss": 0.2261, + "step": 18935, + "teacher_loss": 0.20685747265815735 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.4406070113182068, + "learning_rate": 1.1302307513375398e-05, + "loss": 0.189, + "step": 18936, + "teacher_loss": 0.16103070974349976 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.34024372696876526, + "learning_rate": 1.1300106397863566e-05, + "loss": 0.1437, + "step": 18937, + "teacher_loss": 0.12190777063369751 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.501099705696106, + "learning_rate": 1.1297905367179194e-05, + "loss": 0.2107, + "step": 18938, + "teacher_loss": 0.17847856879234314 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.4490947723388672, + "learning_rate": 1.1295704421372762e-05, + "loss": 0.2429, + "step": 18939, + "teacher_loss": 0.21994373202323914 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.2901543378829956, + "learning_rate": 1.129350356049472e-05, + "loss": 0.1946, + "step": 18940, + "teacher_loss": 0.18396006524562836 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.36750486493110657, + "learning_rate": 1.1291302784595525e-05, + "loss": 0.2405, + "step": 18941, + "teacher_loss": 0.22644253075122833 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.7167357206344604, + "learning_rate": 1.128910209372564e-05, + "loss": 0.2832, + "step": 18942, + "teacher_loss": 0.23507775366306305 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.5238017439842224, + "learning_rate": 1.128690148793552e-05, + "loss": 0.1688, + "step": 18943, + "teacher_loss": 0.1293947696685791 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.5010131597518921, + "learning_rate": 1.128470096727561e-05, + "loss": 0.2643, + "step": 18944, + "teacher_loss": 0.23796433210372925 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.43500784039497375, + "learning_rate": 1.1282500531796374e-05, + "loss": 0.188, + "step": 18945, + "teacher_loss": 0.16054050624370575 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.312985360622406, + "learning_rate": 1.1280300181548254e-05, + "loss": 0.205, + "step": 18946, + "teacher_loss": 0.19297286868095398 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.2814686894416809, + "learning_rate": 1.1278099916581696e-05, + "loss": 0.1815, + "step": 18947, + "teacher_loss": 0.17043611407279968 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.457425981760025, + "learning_rate": 1.127589973694715e-05, + "loss": 0.2391, + "step": 18948, + "teacher_loss": 0.21484504640102386 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.27101561427116394, + "learning_rate": 1.1273699642695062e-05, + "loss": 0.1778, + "step": 18949, + "teacher_loss": 0.16740021109580994 + }, + { + "compression_loss": 0.0, + "epoch": 3.42, + "label_loss": 0.5963525772094727, + "learning_rate": 1.1271499633875865e-05, + "loss": 0.316, + "step": 18950, + "teacher_loss": 0.28483080863952637 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.29575690627098083, + "learning_rate": 1.1269299710540001e-05, + "loss": 0.2006, + "step": 18951, + "teacher_loss": 0.1899731457233429 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.48723265528678894, + "learning_rate": 1.1267099872737912e-05, + "loss": 0.2149, + "step": 18952, + "teacher_loss": 0.18462856113910675 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.39204859733581543, + "learning_rate": 1.126490012052003e-05, + "loss": 0.3879, + "step": 18953, + "teacher_loss": 0.3874187767505646 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.36898645758628845, + "learning_rate": 1.1262700453936788e-05, + "loss": 0.2318, + "step": 18954, + "teacher_loss": 0.21651917695999146 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.5090503096580505, + "learning_rate": 1.1260500873038622e-05, + "loss": 0.2543, + "step": 18955, + "teacher_loss": 0.22600474953651428 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.8451501131057739, + "learning_rate": 1.125830137787596e-05, + "loss": 0.3483, + "step": 18956, + "teacher_loss": 0.2931361198425293 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.3353084921836853, + "learning_rate": 1.1256101968499225e-05, + "loss": 0.2914, + "step": 18957, + "teacher_loss": 0.28654128313064575 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.46174758672714233, + "learning_rate": 1.1253902644958851e-05, + "loss": 0.2772, + "step": 18958, + "teacher_loss": 0.2567267119884491 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.7253978252410889, + "learning_rate": 1.1251703407305262e-05, + "loss": 0.2254, + "step": 18959, + "teacher_loss": 0.16980654001235962 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.7977887392044067, + "learning_rate": 1.1249504255588865e-05, + "loss": 0.2797, + "step": 18960, + "teacher_loss": 0.22212207317352295 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.5071800947189331, + "learning_rate": 1.1247305189860101e-05, + "loss": 0.2219, + "step": 18961, + "teacher_loss": 0.19021233916282654 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.061401158571243286, + "learning_rate": 1.1245106210169374e-05, + "loss": 0.0904, + "step": 18962, + "teacher_loss": 0.09357604384422302 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.29076728224754333, + "learning_rate": 1.12429073165671e-05, + "loss": 0.1559, + "step": 18963, + "teacher_loss": 0.14086532592773438 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.172760009765625, + "learning_rate": 1.1240708509103702e-05, + "loss": 0.1478, + "step": 18964, + "teacher_loss": 0.14499524235725403 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.1276783049106598, + "learning_rate": 1.1238509787829587e-05, + "loss": 0.1806, + "step": 18965, + "teacher_loss": 0.18645638227462769 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.5798298120498657, + "learning_rate": 1.1236311152795162e-05, + "loss": 0.2034, + "step": 18966, + "teacher_loss": 0.16161195933818817 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.2813561260700226, + "learning_rate": 1.1234112604050839e-05, + "loss": 0.2153, + "step": 18967, + "teacher_loss": 0.20792612433433533 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.35899436473846436, + "learning_rate": 1.1231914141647024e-05, + "loss": 0.2199, + "step": 18968, + "teacher_loss": 0.20440340042114258 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.6920740604400635, + "learning_rate": 1.1229715765634122e-05, + "loss": 0.2215, + "step": 18969, + "teacher_loss": 0.16922463476657867 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.9821692109107971, + "learning_rate": 1.122751747606253e-05, + "loss": 0.388, + "step": 18970, + "teacher_loss": 0.3219813406467438 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.3911381959915161, + "learning_rate": 1.1225319272982655e-05, + "loss": 0.2089, + "step": 18971, + "teacher_loss": 0.1886652559041977 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.35358232259750366, + "learning_rate": 1.1223121156444893e-05, + "loss": 0.1922, + "step": 18972, + "teacher_loss": 0.17421412467956543 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.3133712410926819, + "learning_rate": 1.1220923126499632e-05, + "loss": 0.1976, + "step": 18973, + "teacher_loss": 0.184731125831604 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.1710585355758667, + "learning_rate": 1.1218725183197282e-05, + "loss": 0.1833, + "step": 18974, + "teacher_loss": 0.18468737602233887 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.28254732489585876, + "learning_rate": 1.1216527326588222e-05, + "loss": 0.2421, + "step": 18975, + "teacher_loss": 0.23765155673027039 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.24278013408184052, + "learning_rate": 1.1214329556722846e-05, + "loss": 0.1977, + "step": 18976, + "teacher_loss": 0.19265007972717285 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.26353296637535095, + "learning_rate": 1.1212131873651546e-05, + "loss": 0.1768, + "step": 18977, + "teacher_loss": 0.16713783144950867 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.34973233938217163, + "learning_rate": 1.1209934277424705e-05, + "loss": 0.2086, + "step": 18978, + "teacher_loss": 0.19293570518493652 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.6932762861251831, + "learning_rate": 1.1207736768092707e-05, + "loss": 0.3375, + "step": 18979, + "teacher_loss": 0.2980068624019623 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.4084545075893402, + "learning_rate": 1.1205539345705935e-05, + "loss": 0.2911, + "step": 18980, + "teacher_loss": 0.27811479568481445 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.45971405506134033, + "learning_rate": 1.1203342010314772e-05, + "loss": 0.2185, + "step": 18981, + "teacher_loss": 0.19168393313884735 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.7347719669342041, + "learning_rate": 1.1201144761969591e-05, + "loss": 0.2961, + "step": 18982, + "teacher_loss": 0.24735824763774872 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.4427201449871063, + "learning_rate": 1.119894760072077e-05, + "loss": 0.2618, + "step": 18983, + "teacher_loss": 0.24167536199092865 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.27861031889915466, + "learning_rate": 1.1196750526618692e-05, + "loss": 0.2102, + "step": 18984, + "teacher_loss": 0.20259132981300354 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.2403203397989273, + "learning_rate": 1.119455353971371e-05, + "loss": 0.3072, + "step": 18985, + "teacher_loss": 0.31458765268325806 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.29633820056915283, + "learning_rate": 1.1192356640056216e-05, + "loss": 0.1981, + "step": 18986, + "teacher_loss": 0.18721434473991394 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.36018747091293335, + "learning_rate": 1.1190159827696565e-05, + "loss": 0.1918, + "step": 18987, + "teacher_loss": 0.173102468252182 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.6048860549926758, + "learning_rate": 1.118796310268512e-05, + "loss": 0.3652, + "step": 18988, + "teacher_loss": 0.3385535478591919 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.3029094338417053, + "learning_rate": 1.1185766465072262e-05, + "loss": 0.2571, + "step": 18989, + "teacher_loss": 0.2520105838775635 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.26197317242622375, + "learning_rate": 1.1183569914908341e-05, + "loss": 0.2758, + "step": 18990, + "teacher_loss": 0.27731871604919434 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.24724102020263672, + "learning_rate": 1.1181373452243717e-05, + "loss": 0.1365, + "step": 18991, + "teacher_loss": 0.12416236847639084 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.34574952721595764, + "learning_rate": 1.117917707712875e-05, + "loss": 0.2275, + "step": 18992, + "teacher_loss": 0.21431893110275269 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.5497463941574097, + "learning_rate": 1.11769807896138e-05, + "loss": 0.2628, + "step": 18993, + "teacher_loss": 0.23092001676559448 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.5370451211929321, + "learning_rate": 1.1174784589749218e-05, + "loss": 0.2619, + "step": 18994, + "teacher_loss": 0.23129001259803772 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.46238672733306885, + "learning_rate": 1.1172588477585354e-05, + "loss": 0.2769, + "step": 18995, + "teacher_loss": 0.25628677010536194 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.1385607123374939, + "learning_rate": 1.1170392453172563e-05, + "loss": 0.1382, + "step": 18996, + "teacher_loss": 0.13811525702476501 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.2120944857597351, + "learning_rate": 1.1168196516561196e-05, + "loss": 0.2165, + "step": 18997, + "teacher_loss": 0.2170124500989914 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.4757639467716217, + "learning_rate": 1.1166000667801583e-05, + "loss": 0.211, + "step": 18998, + "teacher_loss": 0.18161174654960632 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.4381820261478424, + "learning_rate": 1.1163804906944093e-05, + "loss": 0.2057, + "step": 18999, + "teacher_loss": 0.17984804511070251 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.41334158182144165, + "learning_rate": 1.1161609234039047e-05, + "loss": 0.2713, + "step": 19000, + "teacher_loss": 0.2555544972419739 + }, + { + "epoch": 3.43, + "eval_exact_match": 79.82024597918638, + "eval_f1": 87.46981365235351, + "step": 19000 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.8471626043319702, + "learning_rate": 1.1159413649136793e-05, + "loss": 0.4394, + "step": 19001, + "teacher_loss": 0.39409661293029785 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.38093143701553345, + "learning_rate": 1.1157218152287674e-05, + "loss": 0.1904, + "step": 19002, + "teacher_loss": 0.16921091079711914 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.5689165592193604, + "learning_rate": 1.1155022743542019e-05, + "loss": 0.2834, + "step": 19003, + "teacher_loss": 0.25163358449935913 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.38006454706192017, + "learning_rate": 1.115282742295016e-05, + "loss": 0.3021, + "step": 19004, + "teacher_loss": 0.29344165325164795 + }, + { + "compression_loss": 0.0, + "epoch": 3.43, + "label_loss": 0.532762885093689, + "learning_rate": 1.115063219056244e-05, + "loss": 0.3338, + "step": 19005, + "teacher_loss": 0.31166601181030273 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.49969178438186646, + "learning_rate": 1.1148437046429182e-05, + "loss": 0.2912, + "step": 19006, + "teacher_loss": 0.2680322527885437 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.2557436227798462, + "learning_rate": 1.1146241990600713e-05, + "loss": 0.1811, + "step": 19007, + "teacher_loss": 0.1728229820728302 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.6715871095657349, + "learning_rate": 1.1144047023127361e-05, + "loss": 0.3755, + "step": 19008, + "teacher_loss": 0.3425886631011963 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.8368261456489563, + "learning_rate": 1.1141852144059455e-05, + "loss": 0.4687, + "step": 19009, + "teacher_loss": 0.42783254384994507 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.5769778490066528, + "learning_rate": 1.113965735344731e-05, + "loss": 0.2561, + "step": 19010, + "teacher_loss": 0.22041678428649902 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.25773853063583374, + "learning_rate": 1.113746265134124e-05, + "loss": 0.2289, + "step": 19011, + "teacher_loss": 0.22568972408771515 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.7073142528533936, + "learning_rate": 1.1135268037791582e-05, + "loss": 0.2422, + "step": 19012, + "teacher_loss": 0.19047322869300842 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.23363101482391357, + "learning_rate": 1.1133073512848635e-05, + "loss": 0.2383, + "step": 19013, + "teacher_loss": 0.2387879192829132 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.18842419981956482, + "learning_rate": 1.1130879076562717e-05, + "loss": 0.1998, + "step": 19014, + "teacher_loss": 0.2010757476091385 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.1398010551929474, + "learning_rate": 1.1128684728984144e-05, + "loss": 0.1573, + "step": 19015, + "teacher_loss": 0.15925747156143188 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.5451032519340515, + "learning_rate": 1.1126490470163224e-05, + "loss": 0.3273, + "step": 19016, + "teacher_loss": 0.3031374216079712 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.16702450811862946, + "learning_rate": 1.1124296300150264e-05, + "loss": 0.1769, + "step": 19017, + "teacher_loss": 0.1780245304107666 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.37351179122924805, + "learning_rate": 1.112210221899557e-05, + "loss": 0.1987, + "step": 19018, + "teacher_loss": 0.17925170063972473 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.7907315492630005, + "learning_rate": 1.1119908226749445e-05, + "loss": 0.2356, + "step": 19019, + "teacher_loss": 0.1739162802696228 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.44762319326400757, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.2861, + "step": 19020, + "teacher_loss": 0.2681659460067749 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.234996035695076, + "learning_rate": 1.1115520509184105e-05, + "loss": 0.2535, + "step": 19021, + "teacher_loss": 0.25557753443717957 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.17143544554710388, + "learning_rate": 1.1113326783965497e-05, + "loss": 0.1636, + "step": 19022, + "teacher_loss": 0.16274486482143402 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.4388921856880188, + "learning_rate": 1.111113314785664e-05, + "loss": 0.2156, + "step": 19023, + "teacher_loss": 0.19080013036727905 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.28796571493148804, + "learning_rate": 1.110893960090785e-05, + "loss": 0.1631, + "step": 19024, + "teacher_loss": 0.1491818130016327 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.41557204723358154, + "learning_rate": 1.1106746143169406e-05, + "loss": 0.2497, + "step": 19025, + "teacher_loss": 0.2313135266304016 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.42646127939224243, + "learning_rate": 1.1104552774691598e-05, + "loss": 0.2244, + "step": 19026, + "teacher_loss": 0.2019994854927063 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.22570274770259857, + "learning_rate": 1.1102359495524718e-05, + "loss": 0.1462, + "step": 19027, + "teacher_loss": 0.13731706142425537 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.5449619293212891, + "learning_rate": 1.110016630571905e-05, + "loss": 0.2715, + "step": 19028, + "teacher_loss": 0.24111050367355347 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.676845908164978, + "learning_rate": 1.1097973205324875e-05, + "loss": 0.3617, + "step": 19029, + "teacher_loss": 0.3266303539276123 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.5150049924850464, + "learning_rate": 1.1095780194392476e-05, + "loss": 0.2362, + "step": 19030, + "teacher_loss": 0.20525969564914703 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.32100632786750793, + "learning_rate": 1.1093587272972132e-05, + "loss": 0.171, + "step": 19031, + "teacher_loss": 0.15431350469589233 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.14585193991661072, + "learning_rate": 1.1091394441114122e-05, + "loss": 0.1191, + "step": 19032, + "teacher_loss": 0.11615870893001556 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.4267537593841553, + "learning_rate": 1.1089201698868712e-05, + "loss": 0.2493, + "step": 19033, + "teacher_loss": 0.22954407334327698 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.6894004344940186, + "learning_rate": 1.108700904628619e-05, + "loss": 0.2909, + "step": 19034, + "teacher_loss": 0.2466016709804535 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.3753123879432678, + "learning_rate": 1.1084816483416816e-05, + "loss": 0.185, + "step": 19035, + "teacher_loss": 0.1638604998588562 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.3636763393878937, + "learning_rate": 1.1082624010310856e-05, + "loss": 0.2218, + "step": 19036, + "teacher_loss": 0.20608311891555786 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.14077259600162506, + "learning_rate": 1.1080431627018588e-05, + "loss": 0.2015, + "step": 19037, + "teacher_loss": 0.20823225378990173 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.15060411393642426, + "learning_rate": 1.1078239333590272e-05, + "loss": 0.1453, + "step": 19038, + "teacher_loss": 0.14470574259757996 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.29928404092788696, + "learning_rate": 1.1076047130076169e-05, + "loss": 0.1936, + "step": 19039, + "teacher_loss": 0.18181224167346954 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.6164253950119019, + "learning_rate": 1.1073855016526539e-05, + "loss": 0.2601, + "step": 19040, + "teacher_loss": 0.22046592831611633 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.477509081363678, + "learning_rate": 1.1071662992991644e-05, + "loss": 0.2402, + "step": 19041, + "teacher_loss": 0.21382513642311096 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.6430342793464661, + "learning_rate": 1.1069471059521736e-05, + "loss": 0.2444, + "step": 19042, + "teacher_loss": 0.2001233696937561 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.397210955619812, + "learning_rate": 1.1067279216167075e-05, + "loss": 0.2302, + "step": 19043, + "teacher_loss": 0.21159601211547852 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.973497748374939, + "learning_rate": 1.106508746297791e-05, + "loss": 0.3607, + "step": 19044, + "teacher_loss": 0.29262202978134155 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.4283132553100586, + "learning_rate": 1.106289580000449e-05, + "loss": 0.3044, + "step": 19045, + "teacher_loss": 0.29058772325515747 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.2698388695716858, + "learning_rate": 1.1060704227297068e-05, + "loss": 0.1711, + "step": 19046, + "teacher_loss": 0.160079687833786 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.3356991112232208, + "learning_rate": 1.1058512744905892e-05, + "loss": 0.3469, + "step": 19047, + "teacher_loss": 0.3481284976005554 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.16967880725860596, + "learning_rate": 1.105632135288119e-05, + "loss": 0.1508, + "step": 19048, + "teacher_loss": 0.1486629843711853 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.38393399119377136, + "learning_rate": 1.1054130051273225e-05, + "loss": 0.1983, + "step": 19049, + "teacher_loss": 0.177656888961792 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.2889882028102875, + "learning_rate": 1.1051938840132226e-05, + "loss": 0.1523, + "step": 19050, + "teacher_loss": 0.13713288307189941 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.9815045595169067, + "learning_rate": 1.1049747719508431e-05, + "loss": 0.9973, + "step": 19051, + "teacher_loss": 0.9990469217300415 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.3875212073326111, + "learning_rate": 1.1047556689452077e-05, + "loss": 0.191, + "step": 19052, + "teacher_loss": 0.1692069172859192 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.3760157525539398, + "learning_rate": 1.1045365750013399e-05, + "loss": 0.2184, + "step": 19053, + "teacher_loss": 0.20087511837482452 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.714532732963562, + "learning_rate": 1.1043174901242629e-05, + "loss": 0.2904, + "step": 19054, + "teacher_loss": 0.2433064877986908 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.4075005054473877, + "learning_rate": 1.104098414318999e-05, + "loss": 0.2382, + "step": 19055, + "teacher_loss": 0.2193678617477417 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.26949581503868103, + "learning_rate": 1.1038793475905722e-05, + "loss": 0.2626, + "step": 19056, + "teacher_loss": 0.2618448734283447 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.7603981494903564, + "learning_rate": 1.1036602899440042e-05, + "loss": 0.2307, + "step": 19057, + "teacher_loss": 0.17186513543128967 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.22194135189056396, + "learning_rate": 1.1034412413843166e-05, + "loss": 0.2133, + "step": 19058, + "teacher_loss": 0.21234898269176483 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.49292635917663574, + "learning_rate": 1.1032222019165334e-05, + "loss": 0.2762, + "step": 19059, + "teacher_loss": 0.25212591886520386 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.22617828845977783, + "learning_rate": 1.1030031715456753e-05, + "loss": 0.2467, + "step": 19060, + "teacher_loss": 0.24898689985275269 + }, + { + "compression_loss": 0.0, + "epoch": 3.44, + "label_loss": 0.498049795627594, + "learning_rate": 1.1027841502767636e-05, + "loss": 0.1907, + "step": 19061, + "teacher_loss": 0.1565786749124527 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.3925136923789978, + "learning_rate": 1.1025651381148212e-05, + "loss": 0.1655, + "step": 19062, + "teacher_loss": 0.14022096991539001 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.4340514540672302, + "learning_rate": 1.1023461350648683e-05, + "loss": 0.3079, + "step": 19063, + "teacher_loss": 0.2939187288284302 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.32648319005966187, + "learning_rate": 1.1021271411319262e-05, + "loss": 0.1963, + "step": 19064, + "teacher_loss": 0.18180987238883972 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.28082332015037537, + "learning_rate": 1.1019081563210158e-05, + "loss": 0.2673, + "step": 19065, + "teacher_loss": 0.26580411195755005 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5896283984184265, + "learning_rate": 1.1016891806371581e-05, + "loss": 0.2738, + "step": 19066, + "teacher_loss": 0.23867081105709076 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.448432594537735, + "learning_rate": 1.101470214085373e-05, + "loss": 0.2158, + "step": 19067, + "teacher_loss": 0.18999098241329193 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.48557835817337036, + "learning_rate": 1.1012512566706812e-05, + "loss": 0.5893, + "step": 19068, + "teacher_loss": 0.6008109450340271 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.2265733778476715, + "learning_rate": 1.1010323083981028e-05, + "loss": 0.1714, + "step": 19069, + "teacher_loss": 0.16522319614887238 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.4067123830318451, + "learning_rate": 1.1008133692726569e-05, + "loss": 0.252, + "step": 19070, + "teacher_loss": 0.2347760796546936 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5183794498443604, + "learning_rate": 1.100594439299364e-05, + "loss": 0.3212, + "step": 19071, + "teacher_loss": 0.2993224263191223 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.6018751859664917, + "learning_rate": 1.1003755184832435e-05, + "loss": 0.2658, + "step": 19072, + "teacher_loss": 0.22841951251029968 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.3192692697048187, + "learning_rate": 1.1001566068293138e-05, + "loss": 0.2198, + "step": 19073, + "teacher_loss": 0.20875096321105957 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5363529920578003, + "learning_rate": 1.0999377043425938e-05, + "loss": 0.337, + "step": 19074, + "teacher_loss": 0.3147992491722107 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.3295190632343292, + "learning_rate": 1.0997188110281034e-05, + "loss": 0.166, + "step": 19075, + "teacher_loss": 0.14782433211803436 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5258363485336304, + "learning_rate": 1.0994999268908606e-05, + "loss": 0.5233, + "step": 19076, + "teacher_loss": 0.5230728983879089 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.2074512541294098, + "learning_rate": 1.0992810519358833e-05, + "loss": 0.1599, + "step": 19077, + "teacher_loss": 0.15463721752166748 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.7323053479194641, + "learning_rate": 1.0990621861681905e-05, + "loss": 0.2794, + "step": 19078, + "teacher_loss": 0.22904568910598755 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.6553351879119873, + "learning_rate": 1.0988433295927996e-05, + "loss": 0.2806, + "step": 19079, + "teacher_loss": 0.23897188901901245 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5344595909118652, + "learning_rate": 1.0986244822147279e-05, + "loss": 0.1869, + "step": 19080, + "teacher_loss": 0.1483137607574463 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.6060537695884705, + "learning_rate": 1.098405644038994e-05, + "loss": 0.2669, + "step": 19081, + "teacher_loss": 0.22919301688671112 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.20828035473823547, + "learning_rate": 1.0981868150706148e-05, + "loss": 0.1764, + "step": 19082, + "teacher_loss": 0.17282292246818542 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.4218991994857788, + "learning_rate": 1.0979679953146064e-05, + "loss": 0.1965, + "step": 19083, + "teacher_loss": 0.17145104706287384 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5270686149597168, + "learning_rate": 1.0977491847759874e-05, + "loss": 0.2291, + "step": 19084, + "teacher_loss": 0.19599170982837677 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.45287320017814636, + "learning_rate": 1.0975303834597734e-05, + "loss": 0.1917, + "step": 19085, + "teacher_loss": 0.16270305216312408 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.6269698739051819, + "learning_rate": 1.0973115913709801e-05, + "loss": 0.2743, + "step": 19086, + "teacher_loss": 0.23513120412826538 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.6099036335945129, + "learning_rate": 1.097092808514626e-05, + "loss": 0.2866, + "step": 19087, + "teacher_loss": 0.25071215629577637 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.48839032649993896, + "learning_rate": 1.0968740348957252e-05, + "loss": 0.338, + "step": 19088, + "teacher_loss": 0.32131969928741455 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.8912617564201355, + "learning_rate": 1.0966552705192941e-05, + "loss": 0.2501, + "step": 19089, + "teacher_loss": 0.17885440587997437 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.6618897318840027, + "learning_rate": 1.0964365153903487e-05, + "loss": 0.233, + "step": 19090, + "teacher_loss": 0.18530690670013428 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.20693433284759521, + "learning_rate": 1.0962177695139039e-05, + "loss": 0.1647, + "step": 19091, + "teacher_loss": 0.160008043050766 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5440467000007629, + "learning_rate": 1.0959990328949746e-05, + "loss": 0.2016, + "step": 19092, + "teacher_loss": 0.16355106234550476 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.580567479133606, + "learning_rate": 1.0957803055385765e-05, + "loss": 0.3179, + "step": 19093, + "teacher_loss": 0.2887495458126068 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.21344566345214844, + "learning_rate": 1.0955615874497243e-05, + "loss": 0.1541, + "step": 19094, + "teacher_loss": 0.14745765924453735 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.2570154666900635, + "learning_rate": 1.0953428786334326e-05, + "loss": 0.2103, + "step": 19095, + "teacher_loss": 0.20507436990737915 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5489175915718079, + "learning_rate": 1.0951241790947145e-05, + "loss": 0.2582, + "step": 19096, + "teacher_loss": 0.22592632472515106 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.8758103847503662, + "learning_rate": 1.0949054888385862e-05, + "loss": 0.367, + "step": 19097, + "teacher_loss": 0.31043192744255066 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5319894552230835, + "learning_rate": 1.0946868078700599e-05, + "loss": 0.2324, + "step": 19098, + "teacher_loss": 0.19914206862449646 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 1.2440495491027832, + "learning_rate": 1.0944681361941498e-05, + "loss": 0.3193, + "step": 19099, + "teacher_loss": 0.21660488843917847 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.787350058555603, + "learning_rate": 1.0942494738158698e-05, + "loss": 0.3602, + "step": 19100, + "teacher_loss": 0.3127365708351135 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.14992976188659668, + "learning_rate": 1.0940308207402327e-05, + "loss": 0.2129, + "step": 19101, + "teacher_loss": 0.21990343928337097 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.48523831367492676, + "learning_rate": 1.0938121769722517e-05, + "loss": 0.2114, + "step": 19102, + "teacher_loss": 0.1809995174407959 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.48777472972869873, + "learning_rate": 1.0935935425169396e-05, + "loss": 0.2081, + "step": 19103, + "teacher_loss": 0.17698656022548676 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.8260766863822937, + "learning_rate": 1.0933749173793094e-05, + "loss": 0.267, + "step": 19104, + "teacher_loss": 0.20483244955539703 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.427012175321579, + "learning_rate": 1.0931563015643727e-05, + "loss": 0.211, + "step": 19105, + "teacher_loss": 0.1870269775390625 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.45242053270339966, + "learning_rate": 1.0929376950771425e-05, + "loss": 0.296, + "step": 19106, + "teacher_loss": 0.2785981297492981 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.3336101174354553, + "learning_rate": 1.092719097922631e-05, + "loss": 0.1833, + "step": 19107, + "teacher_loss": 0.16663047671318054 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.3879373073577881, + "learning_rate": 1.0925005101058484e-05, + "loss": 0.3601, + "step": 19108, + "teacher_loss": 0.3569643497467041 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.3112136125564575, + "learning_rate": 1.0922819316318086e-05, + "loss": 0.1345, + "step": 19109, + "teacher_loss": 0.11481940746307373 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.367595911026001, + "learning_rate": 1.0920633625055213e-05, + "loss": 0.2732, + "step": 19110, + "teacher_loss": 0.2627606987953186 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.24126440286636353, + "learning_rate": 1.0918448027319972e-05, + "loss": 0.1679, + "step": 19111, + "teacher_loss": 0.15977492928504944 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.5351781845092773, + "learning_rate": 1.0916262523162492e-05, + "loss": 0.2342, + "step": 19112, + "teacher_loss": 0.20072713494300842 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.22612321376800537, + "learning_rate": 1.0914077112632864e-05, + "loss": 0.2108, + "step": 19113, + "teacher_loss": 0.2091098427772522 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.27265116572380066, + "learning_rate": 1.09118917957812e-05, + "loss": 0.1813, + "step": 19114, + "teacher_loss": 0.1711946427822113 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.507906436920166, + "learning_rate": 1.0909706572657594e-05, + "loss": 0.2572, + "step": 19115, + "teacher_loss": 0.22938573360443115 + }, + { + "compression_loss": 0.0, + "epoch": 3.45, + "label_loss": 0.7333556413650513, + "learning_rate": 1.0907521443312158e-05, + "loss": 0.2457, + "step": 19116, + "teacher_loss": 0.19156251847743988 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.6207568049430847, + "learning_rate": 1.0905336407794985e-05, + "loss": 0.406, + "step": 19117, + "teacher_loss": 0.3821568489074707 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3077746331691742, + "learning_rate": 1.090315146615617e-05, + "loss": 0.2525, + "step": 19118, + "teacher_loss": 0.2464039921760559 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.26714107394218445, + "learning_rate": 1.0900966618445807e-05, + "loss": 0.1751, + "step": 19119, + "teacher_loss": 0.1648259311914444 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4723213016986847, + "learning_rate": 1.0898781864713999e-05, + "loss": 0.4401, + "step": 19120, + "teacher_loss": 0.43646758794784546 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.8825986385345459, + "learning_rate": 1.0896597205010815e-05, + "loss": 0.5143, + "step": 19121, + "teacher_loss": 0.4734327793121338 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4030412435531616, + "learning_rate": 1.0894412639386362e-05, + "loss": 0.2868, + "step": 19122, + "teacher_loss": 0.27388477325439453 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.17659726738929749, + "learning_rate": 1.0892228167890718e-05, + "loss": 0.1779, + "step": 19123, + "teacher_loss": 0.1780160367488861 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.19377273321151733, + "learning_rate": 1.0890043790573961e-05, + "loss": 0.2127, + "step": 19124, + "teacher_loss": 0.21482019126415253 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.45831024646759033, + "learning_rate": 1.0887859507486183e-05, + "loss": 0.2027, + "step": 19125, + "teacher_loss": 0.1743345558643341 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4269407391548157, + "learning_rate": 1.0885675318677456e-05, + "loss": 0.2013, + "step": 19126, + "teacher_loss": 0.17623060941696167 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.45461559295654297, + "learning_rate": 1.0883491224197856e-05, + "loss": 0.245, + "step": 19127, + "teacher_loss": 0.22169965505599976 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4056253433227539, + "learning_rate": 1.0881307224097463e-05, + "loss": 0.2319, + "step": 19128, + "teacher_loss": 0.21257199347019196 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.2653721570968628, + "learning_rate": 1.0879123318426346e-05, + "loss": 0.243, + "step": 19129, + "teacher_loss": 0.24049286544322968 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4330694079399109, + "learning_rate": 1.0876939507234575e-05, + "loss": 0.2731, + "step": 19130, + "teacher_loss": 0.2552933692932129 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5745980143547058, + "learning_rate": 1.0874755790572221e-05, + "loss": 0.2519, + "step": 19131, + "teacher_loss": 0.21599452197551727 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5256795883178711, + "learning_rate": 1.0872572168489353e-05, + "loss": 0.1883, + "step": 19132, + "teacher_loss": 0.15077659487724304 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4138350784778595, + "learning_rate": 1.0870388641036023e-05, + "loss": 0.2443, + "step": 19133, + "teacher_loss": 0.22547096014022827 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.13589030504226685, + "learning_rate": 1.0868205208262302e-05, + "loss": 0.1576, + "step": 19134, + "teacher_loss": 0.16004526615142822 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5820252895355225, + "learning_rate": 1.0866021870218253e-05, + "loss": 0.2479, + "step": 19135, + "teacher_loss": 0.21080715954303741 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.9297947883605957, + "learning_rate": 1.0863838626953924e-05, + "loss": 0.5392, + "step": 19136, + "teacher_loss": 0.4958457946777344 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3560451567173004, + "learning_rate": 1.0861655478519375e-05, + "loss": 0.2492, + "step": 19137, + "teacher_loss": 0.2373504936695099 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3579670488834381, + "learning_rate": 1.0859472424964658e-05, + "loss": 0.2291, + "step": 19138, + "teacher_loss": 0.21473124623298645 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.2981533408164978, + "learning_rate": 1.0857289466339825e-05, + "loss": 0.1637, + "step": 19139, + "teacher_loss": 0.14874543249607086 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3355322480201721, + "learning_rate": 1.0855106602694922e-05, + "loss": 0.1803, + "step": 19140, + "teacher_loss": 0.16306746006011963 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5337064266204834, + "learning_rate": 1.085292383408e-05, + "loss": 0.2835, + "step": 19141, + "teacher_loss": 0.2556573152542114 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3092346787452698, + "learning_rate": 1.0850741160545102e-05, + "loss": 0.1575, + "step": 19142, + "teacher_loss": 0.1406756341457367 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.8014358878135681, + "learning_rate": 1.0848558582140264e-05, + "loss": 0.2952, + "step": 19143, + "teacher_loss": 0.23898519575595856 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.29980674386024475, + "learning_rate": 1.0846376098915536e-05, + "loss": 0.2679, + "step": 19144, + "teacher_loss": 0.26433664560317993 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3592222034931183, + "learning_rate": 1.0844193710920952e-05, + "loss": 0.2944, + "step": 19145, + "teacher_loss": 0.28720492124557495 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.6348129510879517, + "learning_rate": 1.0842011418206538e-05, + "loss": 0.4556, + "step": 19146, + "teacher_loss": 0.43573644757270813 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.598501443862915, + "learning_rate": 1.0839829220822347e-05, + "loss": 0.2475, + "step": 19147, + "teacher_loss": 0.2084900289773941 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.546212911605835, + "learning_rate": 1.0837647118818393e-05, + "loss": 0.3756, + "step": 19148, + "teacher_loss": 0.3565971851348877 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.478501558303833, + "learning_rate": 1.0835465112244709e-05, + "loss": 0.3856, + "step": 19149, + "teacher_loss": 0.37531256675720215 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.45155566930770874, + "learning_rate": 1.0833283201151328e-05, + "loss": 0.3665, + "step": 19150, + "teacher_loss": 0.3570408225059509 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.163311168551445, + "learning_rate": 1.0831101385588271e-05, + "loss": 0.1857, + "step": 19151, + "teacher_loss": 0.18814617395401 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3137606382369995, + "learning_rate": 1.0828919665605557e-05, + "loss": 0.1885, + "step": 19152, + "teacher_loss": 0.17458641529083252 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.37643373012542725, + "learning_rate": 1.0826738041253211e-05, + "loss": 0.2209, + "step": 19153, + "teacher_loss": 0.20356819033622742 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.8409779071807861, + "learning_rate": 1.0824556512581252e-05, + "loss": 0.3443, + "step": 19154, + "teacher_loss": 0.28911924362182617 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.8939721584320068, + "learning_rate": 1.0822375079639688e-05, + "loss": 0.2987, + "step": 19155, + "teacher_loss": 0.23253269493579865 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.2531440854072571, + "learning_rate": 1.0820193742478542e-05, + "loss": 0.1753, + "step": 19156, + "teacher_loss": 0.1666060984134674 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5972980856895447, + "learning_rate": 1.0818012501147824e-05, + "loss": 0.2334, + "step": 19157, + "teacher_loss": 0.19298431277275085 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.3534064292907715, + "learning_rate": 1.0815831355697541e-05, + "loss": 0.2343, + "step": 19158, + "teacher_loss": 0.2210364192724228 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.34636884927749634, + "learning_rate": 1.0813650306177693e-05, + "loss": 0.2036, + "step": 19159, + "teacher_loss": 0.18772061169147491 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.32011911273002625, + "learning_rate": 1.08114693526383e-05, + "loss": 0.1816, + "step": 19160, + "teacher_loss": 0.16624769568443298 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.260425329208374, + "learning_rate": 1.0809288495129354e-05, + "loss": 0.2165, + "step": 19161, + "teacher_loss": 0.21158911287784576 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5655578374862671, + "learning_rate": 1.0807107733700856e-05, + "loss": 0.2962, + "step": 19162, + "teacher_loss": 0.2663109004497528 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.18860939145088196, + "learning_rate": 1.080492706840281e-05, + "loss": 0.161, + "step": 19163, + "teacher_loss": 0.15794195234775543 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.12444545328617096, + "learning_rate": 1.080274649928521e-05, + "loss": 0.1328, + "step": 19164, + "teacher_loss": 0.13371118903160095 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.4685020446777344, + "learning_rate": 1.0800566026398044e-05, + "loss": 0.2251, + "step": 19165, + "teacher_loss": 0.1981002688407898 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.371686190366745, + "learning_rate": 1.079838564979131e-05, + "loss": 0.228, + "step": 19166, + "teacher_loss": 0.21199540793895721 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.17105284333229065, + "learning_rate": 1.0796205369515e-05, + "loss": 0.1771, + "step": 19167, + "teacher_loss": 0.17781619727611542 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.2694578766822815, + "learning_rate": 1.079402518561909e-05, + "loss": 0.2008, + "step": 19168, + "teacher_loss": 0.19312043488025665 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.42173853516578674, + "learning_rate": 1.0791845098153577e-05, + "loss": 0.2454, + "step": 19169, + "teacher_loss": 0.2257966250181198 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.32141679525375366, + "learning_rate": 1.0789665107168445e-05, + "loss": 0.2061, + "step": 19170, + "teacher_loss": 0.19332163035869598 + }, + { + "compression_loss": 0.0, + "epoch": 3.46, + "label_loss": 0.5475047826766968, + "learning_rate": 1.0787485212713656e-05, + "loss": 0.2396, + "step": 19171, + "teacher_loss": 0.20534925162792206 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.3117268681526184, + "learning_rate": 1.0785305414839213e-05, + "loss": 0.1858, + "step": 19172, + "teacher_loss": 0.17180940508842468 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.14585213363170624, + "learning_rate": 1.0783125713595075e-05, + "loss": 0.2007, + "step": 19173, + "teacher_loss": 0.20678508281707764 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5162680149078369, + "learning_rate": 1.078094610903122e-05, + "loss": 0.3348, + "step": 19174, + "teacher_loss": 0.31466004252433777 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.37309640645980835, + "learning_rate": 1.0778766601197624e-05, + "loss": 0.2163, + "step": 19175, + "teacher_loss": 0.1989280879497528 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.440687894821167, + "learning_rate": 1.0776587190144254e-05, + "loss": 0.2074, + "step": 19176, + "teacher_loss": 0.18144112825393677 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.24271106719970703, + "learning_rate": 1.0774407875921078e-05, + "loss": 0.1836, + "step": 19177, + "teacher_loss": 0.17701321840286255 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.27369099855422974, + "learning_rate": 1.0772228658578057e-05, + "loss": 0.2277, + "step": 19178, + "teacher_loss": 0.22257909178733826 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.2757619023323059, + "learning_rate": 1.077004953816516e-05, + "loss": 0.1951, + "step": 19179, + "teacher_loss": 0.18611925840377808 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.6268560886383057, + "learning_rate": 1.076787051473235e-05, + "loss": 0.3945, + "step": 19180, + "teacher_loss": 0.3687174916267395 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.7088404893875122, + "learning_rate": 1.0765691588329567e-05, + "loss": 0.3506, + "step": 19181, + "teacher_loss": 0.3107469081878662 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.1919446736574173, + "learning_rate": 1.0763512759006792e-05, + "loss": 0.1938, + "step": 19182, + "teacher_loss": 0.19402126967906952 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5411832332611084, + "learning_rate": 1.0761334026813966e-05, + "loss": 0.2122, + "step": 19183, + "teacher_loss": 0.17563802003860474 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.36685672402381897, + "learning_rate": 1.0759155391801035e-05, + "loss": 0.2956, + "step": 19184, + "teacher_loss": 0.28765660524368286 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.3054330348968506, + "learning_rate": 1.0756976854017967e-05, + "loss": 0.1888, + "step": 19185, + "teacher_loss": 0.17584654688835144 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.33647847175598145, + "learning_rate": 1.0754798413514694e-05, + "loss": 0.1706, + "step": 19186, + "teacher_loss": 0.15215066075325012 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5734248757362366, + "learning_rate": 1.0752620070341162e-05, + "loss": 0.2574, + "step": 19187, + "teacher_loss": 0.22229892015457153 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.2776779234409332, + "learning_rate": 1.0750441824547323e-05, + "loss": 0.2341, + "step": 19188, + "teacher_loss": 0.2292025089263916 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.7565782070159912, + "learning_rate": 1.0748263676183109e-05, + "loss": 0.2633, + "step": 19189, + "teacher_loss": 0.2085166573524475 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.21718642115592957, + "learning_rate": 1.074608562529846e-05, + "loss": 0.221, + "step": 19190, + "teacher_loss": 0.22139029204845428 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.14208725094795227, + "learning_rate": 1.0743907671943317e-05, + "loss": 0.119, + "step": 19191, + "teacher_loss": 0.1164025217294693 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.2378544807434082, + "learning_rate": 1.0741729816167612e-05, + "loss": 0.169, + "step": 19192, + "teacher_loss": 0.1613958775997162 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.7307143807411194, + "learning_rate": 1.073955205802127e-05, + "loss": 0.4817, + "step": 19193, + "teacher_loss": 0.4540741443634033 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.8807600736618042, + "learning_rate": 1.073737439755423e-05, + "loss": 0.2908, + "step": 19194, + "teacher_loss": 0.2252986878156662 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.3066364526748657, + "learning_rate": 1.0735196834816419e-05, + "loss": 0.2916, + "step": 19195, + "teacher_loss": 0.2899734675884247 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.37363147735595703, + "learning_rate": 1.0733019369857747e-05, + "loss": 0.2394, + "step": 19196, + "teacher_loss": 0.2245309203863144 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5435505509376526, + "learning_rate": 1.0730842002728158e-05, + "loss": 0.2446, + "step": 19197, + "teacher_loss": 0.21135690808296204 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5123671293258667, + "learning_rate": 1.0728664733477558e-05, + "loss": 0.291, + "step": 19198, + "teacher_loss": 0.266435444355011 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.9729100465774536, + "learning_rate": 1.072648756215587e-05, + "loss": 0.3255, + "step": 19199, + "teacher_loss": 0.2535231411457062 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.3581695854663849, + "learning_rate": 1.0724310488813008e-05, + "loss": 0.2028, + "step": 19200, + "teacher_loss": 0.18550726771354675 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5466798543930054, + "learning_rate": 1.0722133513498888e-05, + "loss": 0.3936, + "step": 19201, + "teacher_loss": 0.3765909671783447 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5606138706207275, + "learning_rate": 1.0719956636263424e-05, + "loss": 0.3024, + "step": 19202, + "teacher_loss": 0.27369555830955505 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.3890919089317322, + "learning_rate": 1.0717779857156516e-05, + "loss": 0.2201, + "step": 19203, + "teacher_loss": 0.20130646228790283 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.86761873960495, + "learning_rate": 1.071560317622808e-05, + "loss": 0.5903, + "step": 19204, + "teacher_loss": 0.5594816207885742 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.4426562786102295, + "learning_rate": 1.0713426593528023e-05, + "loss": 0.2194, + "step": 19205, + "teacher_loss": 0.19463102519512177 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.21859760582447052, + "learning_rate": 1.0711250109106233e-05, + "loss": 0.2037, + "step": 19206, + "teacher_loss": 0.2020566463470459 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5120850205421448, + "learning_rate": 1.0709073723012628e-05, + "loss": 0.2194, + "step": 19207, + "teacher_loss": 0.18693125247955322 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.39459556341171265, + "learning_rate": 1.0706897435297097e-05, + "loss": 0.3121, + "step": 19208, + "teacher_loss": 0.30298829078674316 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.708952009677887, + "learning_rate": 1.0704721246009527e-05, + "loss": 0.2542, + "step": 19209, + "teacher_loss": 0.20363706350326538 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.13194632530212402, + "learning_rate": 1.070254515519983e-05, + "loss": 0.1621, + "step": 19210, + "teacher_loss": 0.16545280814170837 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.22494731843471527, + "learning_rate": 1.0700369162917885e-05, + "loss": 0.1636, + "step": 19211, + "teacher_loss": 0.1567291021347046 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.4305468797683716, + "learning_rate": 1.0698193269213584e-05, + "loss": 0.2041, + "step": 19212, + "teacher_loss": 0.17893174290657043 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.6655075550079346, + "learning_rate": 1.0696017474136815e-05, + "loss": 0.2289, + "step": 19213, + "teacher_loss": 0.18036577105522156 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.48842379450798035, + "learning_rate": 1.0693841777737463e-05, + "loss": 0.2826, + "step": 19214, + "teacher_loss": 0.259742796421051 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5211811065673828, + "learning_rate": 1.0691666180065403e-05, + "loss": 0.2538, + "step": 19215, + "teacher_loss": 0.22405563294887543 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.5416889190673828, + "learning_rate": 1.0689490681170524e-05, + "loss": 0.1972, + "step": 19216, + "teacher_loss": 0.15895453095436096 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.4713010787963867, + "learning_rate": 1.0687315281102701e-05, + "loss": 0.278, + "step": 19217, + "teacher_loss": 0.25648993253707886 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.433030366897583, + "learning_rate": 1.068513997991181e-05, + "loss": 0.3604, + "step": 19218, + "teacher_loss": 0.35235482454299927 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.33772972226142883, + "learning_rate": 1.0682964777647716e-05, + "loss": 0.2082, + "step": 19219, + "teacher_loss": 0.19380691647529602 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.29328209161758423, + "learning_rate": 1.0680789674360305e-05, + "loss": 0.1914, + "step": 19220, + "teacher_loss": 0.18006017804145813 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.2782125174999237, + "learning_rate": 1.0678614670099433e-05, + "loss": 0.2892, + "step": 19221, + "teacher_loss": 0.29039037227630615 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.8001796007156372, + "learning_rate": 1.0676439764914968e-05, + "loss": 0.2811, + "step": 19222, + "teacher_loss": 0.22346360981464386 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.1304096281528473, + "learning_rate": 1.0674264958856779e-05, + "loss": 0.1769, + "step": 19223, + "teacher_loss": 0.1820920705795288 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.48276597261428833, + "learning_rate": 1.0672090251974728e-05, + "loss": 0.2425, + "step": 19224, + "teacher_loss": 0.2157881259918213 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.4614584445953369, + "learning_rate": 1.0669915644318664e-05, + "loss": 0.2024, + "step": 19225, + "teacher_loss": 0.17364266514778137 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.3686111569404602, + "learning_rate": 1.0667741135938458e-05, + "loss": 0.2103, + "step": 19226, + "teacher_loss": 0.192699134349823 + }, + { + "compression_loss": 0.0, + "epoch": 3.47, + "label_loss": 0.4633331298828125, + "learning_rate": 1.066556672688396e-05, + "loss": 0.2645, + "step": 19227, + "teacher_loss": 0.2424132525920868 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3806978464126587, + "learning_rate": 1.0663392417205016e-05, + "loss": 0.3477, + "step": 19228, + "teacher_loss": 0.3440232276916504 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.24845215678215027, + "learning_rate": 1.0661218206951486e-05, + "loss": 0.1902, + "step": 19229, + "teacher_loss": 0.1837640255689621 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3281462788581848, + "learning_rate": 1.0659044096173218e-05, + "loss": 0.1715, + "step": 19230, + "teacher_loss": 0.15404045581817627 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5688304901123047, + "learning_rate": 1.0656870084920043e-05, + "loss": 0.2116, + "step": 19231, + "teacher_loss": 0.17193885147571564 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.44448840618133545, + "learning_rate": 1.0654696173241826e-05, + "loss": 0.2892, + "step": 19232, + "teacher_loss": 0.2719712257385254 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5308183431625366, + "learning_rate": 1.0652522361188395e-05, + "loss": 0.2493, + "step": 19233, + "teacher_loss": 0.21802425384521484 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.587203323841095, + "learning_rate": 1.0650348648809585e-05, + "loss": 0.2264, + "step": 19234, + "teacher_loss": 0.18635180592536926 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.2512023448944092, + "learning_rate": 1.064817503615525e-05, + "loss": 0.1918, + "step": 19235, + "teacher_loss": 0.1852271556854248 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.7842098474502563, + "learning_rate": 1.064600152327521e-05, + "loss": 0.2647, + "step": 19236, + "teacher_loss": 0.20699161291122437 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5155953168869019, + "learning_rate": 1.0643828110219298e-05, + "loss": 0.2568, + "step": 19237, + "teacher_loss": 0.22805780172348022 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.7367535829544067, + "learning_rate": 1.0641654797037349e-05, + "loss": 0.2614, + "step": 19238, + "teacher_loss": 0.20855720341205597 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3689441382884979, + "learning_rate": 1.0639481583779192e-05, + "loss": 0.2306, + "step": 19239, + "teacher_loss": 0.21524935960769653 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3272366523742676, + "learning_rate": 1.0637308470494646e-05, + "loss": 0.1857, + "step": 19240, + "teacher_loss": 0.16994619369506836 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.9207844734191895, + "learning_rate": 1.0635135457233533e-05, + "loss": 0.257, + "step": 19241, + "teacher_loss": 0.1832602620124817 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3810318112373352, + "learning_rate": 1.0632962544045682e-05, + "loss": 0.1797, + "step": 19242, + "teacher_loss": 0.15732485055923462 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.2114868313074112, + "learning_rate": 1.0630789730980909e-05, + "loss": 0.177, + "step": 19243, + "teacher_loss": 0.17320480942726135 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.264034241437912, + "learning_rate": 1.0628617018089019e-05, + "loss": 0.2025, + "step": 19244, + "teacher_loss": 0.19565868377685547 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.633360743522644, + "learning_rate": 1.0626444405419844e-05, + "loss": 0.2222, + "step": 19245, + "teacher_loss": 0.1764664500951767 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.2613506317138672, + "learning_rate": 1.0624271893023184e-05, + "loss": 0.1897, + "step": 19246, + "teacher_loss": 0.18168683350086212 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.8338223695755005, + "learning_rate": 1.0622099480948845e-05, + "loss": 0.3309, + "step": 19247, + "teacher_loss": 0.2750103175640106 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.4363275468349457, + "learning_rate": 1.0619927169246644e-05, + "loss": 0.1909, + "step": 19248, + "teacher_loss": 0.16368280351161957 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5929557681083679, + "learning_rate": 1.0617754957966382e-05, + "loss": 0.2242, + "step": 19249, + "teacher_loss": 0.18324080109596252 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.29465240240097046, + "learning_rate": 1.0615582847157854e-05, + "loss": 0.2511, + "step": 19250, + "teacher_loss": 0.24624481797218323 + }, + { + "epoch": 3.48, + "eval_exact_match": 79.98107852412488, + "eval_f1": 87.28192007756861, + "step": 19250 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.46034079790115356, + "learning_rate": 1.0613410836870873e-05, + "loss": 0.3851, + "step": 19251, + "teacher_loss": 0.37673109769821167 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.8422898054122925, + "learning_rate": 1.0611238927155229e-05, + "loss": 0.4861, + "step": 19252, + "teacher_loss": 0.44649890065193176 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.40830233693122864, + "learning_rate": 1.0609067118060712e-05, + "loss": 0.2393, + "step": 19253, + "teacher_loss": 0.22055616974830627 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.40980762243270874, + "learning_rate": 1.0606895409637127e-05, + "loss": 0.1835, + "step": 19254, + "teacher_loss": 0.15840867161750793 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.21113336086273193, + "learning_rate": 1.0604723801934262e-05, + "loss": 0.1898, + "step": 19255, + "teacher_loss": 0.18739478290081024 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 1.049504041671753, + "learning_rate": 1.060255229500189e-05, + "loss": 0.4396, + "step": 19256, + "teacher_loss": 0.3717900514602661 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5374554991722107, + "learning_rate": 1.060038088888982e-05, + "loss": 0.2123, + "step": 19257, + "teacher_loss": 0.17618578672409058 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.23970146477222443, + "learning_rate": 1.0598209583647828e-05, + "loss": 0.2371, + "step": 19258, + "teacher_loss": 0.23686037957668304 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5487461090087891, + "learning_rate": 1.0596038379325683e-05, + "loss": 0.2111, + "step": 19259, + "teacher_loss": 0.1736009120941162 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3135746717453003, + "learning_rate": 1.0593867275973184e-05, + "loss": 0.2044, + "step": 19260, + "teacher_loss": 0.19228631258010864 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.1259211152791977, + "learning_rate": 1.0591696273640094e-05, + "loss": 0.1535, + "step": 19261, + "teacher_loss": 0.15651443600654602 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 1.0688755512237549, + "learning_rate": 1.058952537237619e-05, + "loss": 0.3404, + "step": 19262, + "teacher_loss": 0.2594517469406128 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.05794157832860947, + "learning_rate": 1.0587354572231246e-05, + "loss": 0.1509, + "step": 19263, + "teacher_loss": 0.1612202525138855 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 1.153679370880127, + "learning_rate": 1.0585183873255032e-05, + "loss": 0.2392, + "step": 19264, + "teacher_loss": 0.13755811750888824 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3633022904396057, + "learning_rate": 1.0583013275497318e-05, + "loss": 0.1657, + "step": 19265, + "teacher_loss": 0.14370760321617126 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.38161903619766235, + "learning_rate": 1.0580842779007862e-05, + "loss": 0.2243, + "step": 19266, + "teacher_loss": 0.2068396657705307 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.5384604930877686, + "learning_rate": 1.0578672383836437e-05, + "loss": 0.1892, + "step": 19267, + "teacher_loss": 0.15040919184684753 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.43093258142471313, + "learning_rate": 1.05765020900328e-05, + "loss": 0.1647, + "step": 19268, + "teacher_loss": 0.13507144153118134 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.38681650161743164, + "learning_rate": 1.05743318976467e-05, + "loss": 0.2567, + "step": 19269, + "teacher_loss": 0.24221184849739075 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.27728545665740967, + "learning_rate": 1.0572161806727908e-05, + "loss": 0.1872, + "step": 19270, + "teacher_loss": 0.1772192269563675 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.7905272841453552, + "learning_rate": 1.0569991817326166e-05, + "loss": 0.8354, + "step": 19271, + "teacher_loss": 0.8404158353805542 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.4105336666107178, + "learning_rate": 1.056782192949123e-05, + "loss": 0.2087, + "step": 19272, + "teacher_loss": 0.18627750873565674 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.1907138228416443, + "learning_rate": 1.0565652143272851e-05, + "loss": 0.2057, + "step": 19273, + "teacher_loss": 0.20741309225559235 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3579472303390503, + "learning_rate": 1.0563482458720773e-05, + "loss": 0.2178, + "step": 19274, + "teacher_loss": 0.20220181345939636 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.42955437302589417, + "learning_rate": 1.0561312875884739e-05, + "loss": 0.3227, + "step": 19275, + "teacher_loss": 0.3108643591403961 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.7240515947341919, + "learning_rate": 1.0559143394814494e-05, + "loss": 0.2373, + "step": 19276, + "teacher_loss": 0.1831989735364914 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.451634019613266, + "learning_rate": 1.0556974015559776e-05, + "loss": 0.3309, + "step": 19277, + "teacher_loss": 0.31752827763557434 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.44515055418014526, + "learning_rate": 1.0554804738170322e-05, + "loss": 0.2004, + "step": 19278, + "teacher_loss": 0.17319729924201965 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.26225972175598145, + "learning_rate": 1.0552635562695871e-05, + "loss": 0.1444, + "step": 19279, + "teacher_loss": 0.13127672672271729 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3869149386882782, + "learning_rate": 1.0550466489186156e-05, + "loss": 0.2881, + "step": 19280, + "teacher_loss": 0.27712059020996094 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.3492221236228943, + "learning_rate": 1.05482975176909e-05, + "loss": 0.2403, + "step": 19281, + "teacher_loss": 0.2282455414533615 + }, + { + "compression_loss": 0.0, + "epoch": 3.48, + "label_loss": 0.4371429681777954, + "learning_rate": 1.0546128648259828e-05, + "loss": 0.2738, + "step": 19282, + "teacher_loss": 0.25567546486854553 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.48671260476112366, + "learning_rate": 1.0543959880942684e-05, + "loss": 0.2447, + "step": 19283, + "teacher_loss": 0.21780069172382355 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.22923651337623596, + "learning_rate": 1.0541791215789175e-05, + "loss": 0.1583, + "step": 19284, + "teacher_loss": 0.15043804049491882 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5727065801620483, + "learning_rate": 1.0539622652849026e-05, + "loss": 0.4167, + "step": 19285, + "teacher_loss": 0.3993412256240845 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.8295423984527588, + "learning_rate": 1.0537454192171958e-05, + "loss": 0.2688, + "step": 19286, + "teacher_loss": 0.20652857422828674 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.2851209044456482, + "learning_rate": 1.0535285833807684e-05, + "loss": 0.1911, + "step": 19287, + "teacher_loss": 0.1806366890668869 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.2591969966888428, + "learning_rate": 1.0533117577805921e-05, + "loss": 0.21, + "step": 19288, + "teacher_loss": 0.20455129444599152 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.20610500872135162, + "learning_rate": 1.0530949424216382e-05, + "loss": 0.1519, + "step": 19289, + "teacher_loss": 0.145859032869339 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5352863073348999, + "learning_rate": 1.0528781373088772e-05, + "loss": 0.2666, + "step": 19290, + "teacher_loss": 0.23679813742637634 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.39734768867492676, + "learning_rate": 1.0526613424472797e-05, + "loss": 0.1678, + "step": 19291, + "teacher_loss": 0.14229996502399445 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.31339430809020996, + "learning_rate": 1.0524445578418168e-05, + "loss": 0.3013, + "step": 19292, + "teacher_loss": 0.2999735176563263 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.2556731402873993, + "learning_rate": 1.0522277834974586e-05, + "loss": 0.2163, + "step": 19293, + "teacher_loss": 0.2119034081697464 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.4354918599128723, + "learning_rate": 1.052011019419174e-05, + "loss": 0.2511, + "step": 19294, + "teacher_loss": 0.23063281178474426 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.22145043313503265, + "learning_rate": 1.0517942656119344e-05, + "loss": 0.2039, + "step": 19295, + "teacher_loss": 0.20190554857254028 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5245985984802246, + "learning_rate": 1.0515775220807083e-05, + "loss": 0.2546, + "step": 19296, + "teacher_loss": 0.22458285093307495 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.29969215393066406, + "learning_rate": 1.0513607888304646e-05, + "loss": 0.2778, + "step": 19297, + "teacher_loss": 0.2753612995147705 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.13588854670524597, + "learning_rate": 1.0511440658661736e-05, + "loss": 0.1707, + "step": 19298, + "teacher_loss": 0.1745852828025818 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.17867204546928406, + "learning_rate": 1.0509273531928031e-05, + "loss": 0.1957, + "step": 19299, + "teacher_loss": 0.1975673884153366 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5238090753555298, + "learning_rate": 1.050710650815322e-05, + "loss": 0.2905, + "step": 19300, + "teacher_loss": 0.26462483406066895 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.520237922668457, + "learning_rate": 1.0504939587386986e-05, + "loss": 0.2013, + "step": 19301, + "teacher_loss": 0.16584312915802002 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5799924731254578, + "learning_rate": 1.0502772769679014e-05, + "loss": 0.2181, + "step": 19302, + "teacher_loss": 0.17792558670043945 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5792575478553772, + "learning_rate": 1.050060605507898e-05, + "loss": 0.3452, + "step": 19303, + "teacher_loss": 0.3191969394683838 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.1431734263896942, + "learning_rate": 1.049843944363655e-05, + "loss": 0.1469, + "step": 19304, + "teacher_loss": 0.1472712606191635 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.40714800357818604, + "learning_rate": 1.0496272935401416e-05, + "loss": 0.2605, + "step": 19305, + "teacher_loss": 0.24423368275165558 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3165675401687622, + "learning_rate": 1.049410653042324e-05, + "loss": 0.2296, + "step": 19306, + "teacher_loss": 0.2199425846338272 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.8372650146484375, + "learning_rate": 1.0491940228751683e-05, + "loss": 0.2786, + "step": 19307, + "teacher_loss": 0.21653325855731964 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.20374202728271484, + "learning_rate": 1.0489774030436433e-05, + "loss": 0.2019, + "step": 19308, + "teacher_loss": 0.20172226428985596 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3260893225669861, + "learning_rate": 1.0487607935527136e-05, + "loss": 0.2325, + "step": 19309, + "teacher_loss": 0.2220931053161621 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3871397376060486, + "learning_rate": 1.0485441944073458e-05, + "loss": 0.2053, + "step": 19310, + "teacher_loss": 0.18509185314178467 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.21755719184875488, + "learning_rate": 1.0483276056125063e-05, + "loss": 0.1567, + "step": 19311, + "teacher_loss": 0.14996322989463806 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.25184404850006104, + "learning_rate": 1.048111027173161e-05, + "loss": 0.2082, + "step": 19312, + "teacher_loss": 0.2033727467060089 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.36648911237716675, + "learning_rate": 1.0478944590942745e-05, + "loss": 0.2385, + "step": 19313, + "teacher_loss": 0.22424963116645813 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.6335453987121582, + "learning_rate": 1.0476779013808126e-05, + "loss": 0.2557, + "step": 19314, + "teacher_loss": 0.21370047330856323 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3886480927467346, + "learning_rate": 1.0474613540377405e-05, + "loss": 0.1956, + "step": 19315, + "teacher_loss": 0.17412322759628296 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3643404245376587, + "learning_rate": 1.0472448170700227e-05, + "loss": 0.1942, + "step": 19316, + "teacher_loss": 0.1753401756286621 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.2642322778701782, + "learning_rate": 1.0470282904826238e-05, + "loss": 0.1774, + "step": 19317, + "teacher_loss": 0.16774627566337585 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3977299630641937, + "learning_rate": 1.0468117742805086e-05, + "loss": 0.2492, + "step": 19318, + "teacher_loss": 0.23269028961658478 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.45396262407302856, + "learning_rate": 1.0465952684686396e-05, + "loss": 0.3028, + "step": 19319, + "teacher_loss": 0.2860533893108368 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.21616527438163757, + "learning_rate": 1.0463787730519829e-05, + "loss": 0.1515, + "step": 19320, + "teacher_loss": 0.14435552060604095 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.49721047282218933, + "learning_rate": 1.0461622880355003e-05, + "loss": 0.2598, + "step": 19321, + "teacher_loss": 0.23338137567043304 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.4627741575241089, + "learning_rate": 1.0459458134241558e-05, + "loss": 0.2447, + "step": 19322, + "teacher_loss": 0.2204170823097229 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5647050738334656, + "learning_rate": 1.0457293492229124e-05, + "loss": 0.4037, + "step": 19323, + "teacher_loss": 0.38577407598495483 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5595830678939819, + "learning_rate": 1.0455128954367332e-05, + "loss": 0.1945, + "step": 19324, + "teacher_loss": 0.15398472547531128 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5528039932250977, + "learning_rate": 1.0452964520705808e-05, + "loss": 0.2231, + "step": 19325, + "teacher_loss": 0.1864643096923828 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.6555163860321045, + "learning_rate": 1.0450800191294171e-05, + "loss": 0.2811, + "step": 19326, + "teacher_loss": 0.23948630690574646 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.2831399440765381, + "learning_rate": 1.0448635966182049e-05, + "loss": 0.2835, + "step": 19327, + "teacher_loss": 0.283489853143692 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.7469034194946289, + "learning_rate": 1.0446471845419063e-05, + "loss": 0.2278, + "step": 19328, + "teacher_loss": 0.17009249329566956 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3104090690612793, + "learning_rate": 1.0444307829054814e-05, + "loss": 0.1969, + "step": 19329, + "teacher_loss": 0.18427559733390808 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.6383261680603027, + "learning_rate": 1.0442143917138939e-05, + "loss": 0.3688, + "step": 19330, + "teacher_loss": 0.33880770206451416 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.24821141362190247, + "learning_rate": 1.0439980109721035e-05, + "loss": 0.1548, + "step": 19331, + "teacher_loss": 0.1444035768508911 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.34593719244003296, + "learning_rate": 1.0437816406850706e-05, + "loss": 0.1571, + "step": 19332, + "teacher_loss": 0.13606838881969452 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.4385605454444885, + "learning_rate": 1.0435652808577583e-05, + "loss": 0.2252, + "step": 19333, + "teacher_loss": 0.2015310376882553 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.3425580859184265, + "learning_rate": 1.043348931495125e-05, + "loss": 0.1793, + "step": 19334, + "teacher_loss": 0.16111284494400024 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.33186042308807373, + "learning_rate": 1.0431325926021315e-05, + "loss": 0.2403, + "step": 19335, + "teacher_loss": 0.23012122511863708 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.6210047006607056, + "learning_rate": 1.0429162641837382e-05, + "loss": 0.7844, + "step": 19336, + "teacher_loss": 0.8025637865066528 + }, + { + "compression_loss": 0.0, + "epoch": 3.49, + "label_loss": 0.5344265103340149, + "learning_rate": 1.0426999462449045e-05, + "loss": 0.218, + "step": 19337, + "teacher_loss": 0.18279507756233215 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.3936270773410797, + "learning_rate": 1.0424836387905895e-05, + "loss": 0.1728, + "step": 19338, + "teacher_loss": 0.14822138845920563 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.2597653865814209, + "learning_rate": 1.0422673418257536e-05, + "loss": 0.19, + "step": 19339, + "teacher_loss": 0.18227365612983704 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.3911869525909424, + "learning_rate": 1.042051055355355e-05, + "loss": 0.1489, + "step": 19340, + "teacher_loss": 0.12196915596723557 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.4995865225791931, + "learning_rate": 1.0418347793843524e-05, + "loss": 0.2675, + "step": 19341, + "teacher_loss": 0.24170859158039093 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.9081000089645386, + "learning_rate": 1.0416185139177048e-05, + "loss": 0.501, + "step": 19342, + "teacher_loss": 0.45581477880477905 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.2463749349117279, + "learning_rate": 1.041402258960371e-05, + "loss": 0.1725, + "step": 19343, + "teacher_loss": 0.16426461935043335 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.4831945300102234, + "learning_rate": 1.041186014517308e-05, + "loss": 0.2348, + "step": 19344, + "teacher_loss": 0.20718804001808167 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.23190772533416748, + "learning_rate": 1.0409697805934737e-05, + "loss": 0.2275, + "step": 19345, + "teacher_loss": 0.22698181867599487 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.34947869181632996, + "learning_rate": 1.0407535571938265e-05, + "loss": 0.2124, + "step": 19346, + "teacher_loss": 0.197207510471344 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5225273370742798, + "learning_rate": 1.0405373443233234e-05, + "loss": 0.2165, + "step": 19347, + "teacher_loss": 0.18245337903499603 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.2556726932525635, + "learning_rate": 1.040321141986921e-05, + "loss": 0.1364, + "step": 19348, + "teacher_loss": 0.12319750338792801 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.6717689037322998, + "learning_rate": 1.040104950189577e-05, + "loss": 0.3268, + "step": 19349, + "teacher_loss": 0.28844213485717773 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.6527443528175354, + "learning_rate": 1.0398887689362478e-05, + "loss": 0.3123, + "step": 19350, + "teacher_loss": 0.2744565010070801 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.809870183467865, + "learning_rate": 1.039672598231889e-05, + "loss": 0.2892, + "step": 19351, + "teacher_loss": 0.23138704895973206 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5554569959640503, + "learning_rate": 1.0394564380814578e-05, + "loss": 0.2531, + "step": 19352, + "teacher_loss": 0.21948108077049255 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.11442992091178894, + "learning_rate": 1.0392402884899102e-05, + "loss": 0.14, + "step": 19353, + "teacher_loss": 0.1428043395280838 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.2875497341156006, + "learning_rate": 1.0390241494622003e-05, + "loss": 0.2001, + "step": 19354, + "teacher_loss": 0.19038638472557068 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.717458963394165, + "learning_rate": 1.0388080210032856e-05, + "loss": 0.2672, + "step": 19355, + "teacher_loss": 0.2171338051557541 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.2764398455619812, + "learning_rate": 1.0385919031181199e-05, + "loss": 0.1975, + "step": 19356, + "teacher_loss": 0.1886882185935974 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5169720649719238, + "learning_rate": 1.0383757958116576e-05, + "loss": 0.189, + "step": 19357, + "teacher_loss": 0.15260049700737 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.37131014466285706, + "learning_rate": 1.0381596990888557e-05, + "loss": 0.1948, + "step": 19358, + "teacher_loss": 0.17515847086906433 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.33470162749290466, + "learning_rate": 1.0379436129546667e-05, + "loss": 0.2349, + "step": 19359, + "teacher_loss": 0.22376102209091187 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.4096849262714386, + "learning_rate": 1.0377275374140448e-05, + "loss": 0.1906, + "step": 19360, + "teacher_loss": 0.16623128950595856 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.4260618984699249, + "learning_rate": 1.0375114724719452e-05, + "loss": 0.2383, + "step": 19361, + "teacher_loss": 0.21742624044418335 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5361185073852539, + "learning_rate": 1.0372954181333206e-05, + "loss": 0.2323, + "step": 19362, + "teacher_loss": 0.19852572679519653 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5701732635498047, + "learning_rate": 1.0370793744031245e-05, + "loss": 0.2734, + "step": 19363, + "teacher_loss": 0.2403847724199295 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.3092060685157776, + "learning_rate": 1.0368633412863111e-05, + "loss": 0.2307, + "step": 19364, + "teacher_loss": 0.2219313532114029 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.6654055118560791, + "learning_rate": 1.0366473187878324e-05, + "loss": 0.5202, + "step": 19365, + "teacher_loss": 0.5040406584739685 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.3943619728088379, + "learning_rate": 1.0364313069126419e-05, + "loss": 0.2207, + "step": 19366, + "teacher_loss": 0.2014119178056717 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.7261480689048767, + "learning_rate": 1.0362153056656908e-05, + "loss": 0.2408, + "step": 19367, + "teacher_loss": 0.18682153522968292 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.3227559030056, + "learning_rate": 1.035999315051933e-05, + "loss": 0.2871, + "step": 19368, + "teacher_loss": 0.28309759497642517 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.24620960652828217, + "learning_rate": 1.0357833350763196e-05, + "loss": 0.215, + "step": 19369, + "teacher_loss": 0.2114812731742859 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.759568452835083, + "learning_rate": 1.0355673657438021e-05, + "loss": 0.2296, + "step": 19370, + "teacher_loss": 0.1707087755203247 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.6023832559585571, + "learning_rate": 1.035351407059333e-05, + "loss": 0.2357, + "step": 19371, + "teacher_loss": 0.19490975141525269 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5730940699577332, + "learning_rate": 1.035135459027863e-05, + "loss": 0.2893, + "step": 19372, + "teacher_loss": 0.2577604651451111 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.32742127776145935, + "learning_rate": 1.0349195216543426e-05, + "loss": 0.3884, + "step": 19373, + "teacher_loss": 0.3951761722564697 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.1408384144306183, + "learning_rate": 1.0347035949437237e-05, + "loss": 0.1826, + "step": 19374, + "teacher_loss": 0.18722745776176453 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.35834234952926636, + "learning_rate": 1.0344876789009563e-05, + "loss": 0.2778, + "step": 19375, + "teacher_loss": 0.2688608169555664 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.475138396024704, + "learning_rate": 1.0342717735309905e-05, + "loss": 0.234, + "step": 19376, + "teacher_loss": 0.2072281837463379 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.3047199845314026, + "learning_rate": 1.0340558788387768e-05, + "loss": 0.2227, + "step": 19377, + "teacher_loss": 0.21363796293735504 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.36014455556869507, + "learning_rate": 1.0338399948292651e-05, + "loss": 0.1893, + "step": 19378, + "teacher_loss": 0.170371413230896 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.26450949907302856, + "learning_rate": 1.0336241215074039e-05, + "loss": 0.1898, + "step": 19379, + "teacher_loss": 0.18149280548095703 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.22230613231658936, + "learning_rate": 1.033408258878144e-05, + "loss": 0.2809, + "step": 19380, + "teacher_loss": 0.28739115595817566 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5090571045875549, + "learning_rate": 1.033192406946434e-05, + "loss": 0.2245, + "step": 19381, + "teacher_loss": 0.1929173469543457 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.323691189289093, + "learning_rate": 1.0329765657172216e-05, + "loss": 0.2136, + "step": 19382, + "teacher_loss": 0.20132896304130554 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.9403353929519653, + "learning_rate": 1.0327607351954575e-05, + "loss": 0.2712, + "step": 19383, + "teacher_loss": 0.1967974305152893 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5049049854278564, + "learning_rate": 1.0325449153860884e-05, + "loss": 0.4794, + "step": 19384, + "teacher_loss": 0.47658807039260864 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.6250417232513428, + "learning_rate": 1.0323291062940628e-05, + "loss": 0.2304, + "step": 19385, + "teacher_loss": 0.18660229444503784 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5964815616607666, + "learning_rate": 1.0321133079243285e-05, + "loss": 0.2614, + "step": 19386, + "teacher_loss": 0.2242087721824646 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.2767605483531952, + "learning_rate": 1.0318975202818333e-05, + "loss": 0.149, + "step": 19387, + "teacher_loss": 0.134830042719841 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.6674554347991943, + "learning_rate": 1.0316817433715246e-05, + "loss": 0.2743, + "step": 19388, + "teacher_loss": 0.23057660460472107 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.1375901699066162, + "learning_rate": 1.0314659771983493e-05, + "loss": 0.2116, + "step": 19389, + "teacher_loss": 0.21980033814907074 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5514234900474548, + "learning_rate": 1.0312502217672547e-05, + "loss": 0.2367, + "step": 19390, + "teacher_loss": 0.2017441689968109 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5218294262886047, + "learning_rate": 1.0310344770831875e-05, + "loss": 0.2714, + "step": 19391, + "teacher_loss": 0.24358510971069336 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.4563637375831604, + "learning_rate": 1.0308187431510927e-05, + "loss": 0.3057, + "step": 19392, + "teacher_loss": 0.28897643089294434 + }, + { + "compression_loss": 0.0, + "epoch": 3.5, + "label_loss": 0.5893850922584534, + "learning_rate": 1.0306030199759181e-05, + "loss": 0.2697, + "step": 19393, + "teacher_loss": 0.23421460390090942 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.22820015251636505, + "learning_rate": 1.0303873075626089e-05, + "loss": 0.2006, + "step": 19394, + "teacher_loss": 0.19748564064502716 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5321241617202759, + "learning_rate": 1.0301716059161103e-05, + "loss": 0.2146, + "step": 19395, + "teacher_loss": 0.17927706241607666 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.370492160320282, + "learning_rate": 1.0299559150413685e-05, + "loss": 0.285, + "step": 19396, + "teacher_loss": 0.2755116820335388 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.511408269405365, + "learning_rate": 1.0297402349433286e-05, + "loss": 0.2137, + "step": 19397, + "teacher_loss": 0.18058043718338013 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3674183189868927, + "learning_rate": 1.0295245656269346e-05, + "loss": 0.2294, + "step": 19398, + "teacher_loss": 0.21405398845672607 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.32813528180122375, + "learning_rate": 1.0293089070971322e-05, + "loss": 0.2423, + "step": 19399, + "teacher_loss": 0.2327880561351776 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3610363006591797, + "learning_rate": 1.029093259358865e-05, + "loss": 0.2426, + "step": 19400, + "teacher_loss": 0.22943758964538574 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.37287163734436035, + "learning_rate": 1.0288776224170776e-05, + "loss": 0.2092, + "step": 19401, + "teacher_loss": 0.19098162651062012 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.27948683500289917, + "learning_rate": 1.028661996276714e-05, + "loss": 0.2072, + "step": 19402, + "teacher_loss": 0.19917762279510498 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.7710708379745483, + "learning_rate": 1.028446380942718e-05, + "loss": 0.2988, + "step": 19403, + "teacher_loss": 0.24627827107906342 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.4671623706817627, + "learning_rate": 1.0282307764200319e-05, + "loss": 0.1893, + "step": 19404, + "teacher_loss": 0.15840734541416168 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5016294121742249, + "learning_rate": 1.0280151827136e-05, + "loss": 0.2472, + "step": 19405, + "teacher_loss": 0.21895377337932587 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5507128834724426, + "learning_rate": 1.0277995998283652e-05, + "loss": 0.266, + "step": 19406, + "teacher_loss": 0.2343181073665619 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.8995944857597351, + "learning_rate": 1.0275840277692698e-05, + "loss": 0.3289, + "step": 19407, + "teacher_loss": 0.26543593406677246 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3049280047416687, + "learning_rate": 1.0273684665412557e-05, + "loss": 0.2274, + "step": 19408, + "teacher_loss": 0.21874544024467468 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.4382936358451843, + "learning_rate": 1.0271529161492662e-05, + "loss": 0.2408, + "step": 19409, + "teacher_loss": 0.21882781386375427 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.6913855671882629, + "learning_rate": 1.0269373765982426e-05, + "loss": 0.2669, + "step": 19410, + "teacher_loss": 0.21969617903232574 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3898928165435791, + "learning_rate": 1.0267218478931261e-05, + "loss": 0.2026, + "step": 19411, + "teacher_loss": 0.18173912167549133 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.66672682762146, + "learning_rate": 1.0265063300388591e-05, + "loss": 0.2145, + "step": 19412, + "teacher_loss": 0.16424354910850525 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 1.2872543334960938, + "learning_rate": 1.0262908230403824e-05, + "loss": 0.6048, + "step": 19413, + "teacher_loss": 0.5289934277534485 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.19633536040782928, + "learning_rate": 1.0260753269026366e-05, + "loss": 0.17, + "step": 19414, + "teacher_loss": 0.1671261191368103 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3945610523223877, + "learning_rate": 1.0258598416305628e-05, + "loss": 0.2328, + "step": 19415, + "teacher_loss": 0.21487906575202942 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.7016512155532837, + "learning_rate": 1.0256443672291019e-05, + "loss": 0.2497, + "step": 19416, + "teacher_loss": 0.19947555661201477 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.2866821587085724, + "learning_rate": 1.0254289037031922e-05, + "loss": 0.2178, + "step": 19417, + "teacher_loss": 0.21019750833511353 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3654671907424927, + "learning_rate": 1.025213451057776e-05, + "loss": 0.1846, + "step": 19418, + "teacher_loss": 0.16452905535697937 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.4609503149986267, + "learning_rate": 1.0249980092977916e-05, + "loss": 0.2438, + "step": 19419, + "teacher_loss": 0.21971558034420013 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.2799150347709656, + "learning_rate": 1.0247825784281782e-05, + "loss": 0.2355, + "step": 19420, + "teacher_loss": 0.23053419589996338 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3768486976623535, + "learning_rate": 1.0245671584538762e-05, + "loss": 0.2151, + "step": 19421, + "teacher_loss": 0.19709941744804382 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 1.0270946025848389, + "learning_rate": 1.0243517493798234e-05, + "loss": 0.2689, + "step": 19422, + "teacher_loss": 0.18467873334884644 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.16900645196437836, + "learning_rate": 1.0241363512109588e-05, + "loss": 0.2111, + "step": 19423, + "teacher_loss": 0.2158191204071045 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.6266013383865356, + "learning_rate": 1.0239209639522213e-05, + "loss": 0.3801, + "step": 19424, + "teacher_loss": 0.35273754596710205 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5590620636940002, + "learning_rate": 1.0237055876085487e-05, + "loss": 0.258, + "step": 19425, + "teacher_loss": 0.22455421090126038 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.6067731976509094, + "learning_rate": 1.0234902221848786e-05, + "loss": 0.357, + "step": 19426, + "teacher_loss": 0.32928046584129333 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.48606717586517334, + "learning_rate": 1.0232748676861495e-05, + "loss": 0.2588, + "step": 19427, + "teacher_loss": 0.23349490761756897 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.9790364503860474, + "learning_rate": 1.0230595241172987e-05, + "loss": 0.2545, + "step": 19428, + "teacher_loss": 0.17399568855762482 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.12621092796325684, + "learning_rate": 1.0228441914832626e-05, + "loss": 0.2106, + "step": 19429, + "teacher_loss": 0.21996408700942993 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5764368772506714, + "learning_rate": 1.022628869788978e-05, + "loss": 0.2159, + "step": 19430, + "teacher_loss": 0.17582494020462036 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5570341348648071, + "learning_rate": 1.0224135590393833e-05, + "loss": 0.2366, + "step": 19431, + "teacher_loss": 0.20099316537380219 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.421671599149704, + "learning_rate": 1.0221982592394134e-05, + "loss": 0.2063, + "step": 19432, + "teacher_loss": 0.18232038617134094 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3714757263660431, + "learning_rate": 1.0219829703940047e-05, + "loss": 0.2044, + "step": 19433, + "teacher_loss": 0.18580356240272522 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.32862144708633423, + "learning_rate": 1.0217676925080933e-05, + "loss": 0.308, + "step": 19434, + "teacher_loss": 0.3056778311729431 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.22198928892612457, + "learning_rate": 1.021552425586615e-05, + "loss": 0.1868, + "step": 19435, + "teacher_loss": 0.1829097419977188 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5531752109527588, + "learning_rate": 1.0213371696345051e-05, + "loss": 0.3392, + "step": 19436, + "teacher_loss": 0.31539642810821533 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.670462965965271, + "learning_rate": 1.021121924656699e-05, + "loss": 0.3587, + "step": 19437, + "teacher_loss": 0.3240899443626404 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.311935693025589, + "learning_rate": 1.0209066906581314e-05, + "loss": 0.1779, + "step": 19438, + "teacher_loss": 0.16302621364593506 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.2325492799282074, + "learning_rate": 1.0206914676437363e-05, + "loss": 0.2081, + "step": 19439, + "teacher_loss": 0.20534178614616394 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5304273366928101, + "learning_rate": 1.0204762556184497e-05, + "loss": 0.2397, + "step": 19440, + "teacher_loss": 0.20741626620292664 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3660224676132202, + "learning_rate": 1.0202610545872048e-05, + "loss": 0.1891, + "step": 19441, + "teacher_loss": 0.1694117933511734 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.7343100309371948, + "learning_rate": 1.0200458645549347e-05, + "loss": 0.3189, + "step": 19442, + "teacher_loss": 0.2727666199207306 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.39585065841674805, + "learning_rate": 1.019830685526575e-05, + "loss": 0.3638, + "step": 19443, + "teacher_loss": 0.36021149158477783 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5108075737953186, + "learning_rate": 1.0196155175070576e-05, + "loss": 0.3285, + "step": 19444, + "teacher_loss": 0.3082049489021301 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.30642494559288025, + "learning_rate": 1.019400360501316e-05, + "loss": 0.1703, + "step": 19445, + "teacher_loss": 0.15512561798095703 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.5225207805633545, + "learning_rate": 1.0191852145142833e-05, + "loss": 0.3237, + "step": 19446, + "teacher_loss": 0.3016514182090759 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 0.3370410203933716, + "learning_rate": 1.0189700795508924e-05, + "loss": 0.2077, + "step": 19447, + "teacher_loss": 0.19327399134635925 + }, + { + "compression_loss": 0.0, + "epoch": 3.51, + "label_loss": 1.027052402496338, + "learning_rate": 1.018754955616075e-05, + "loss": 0.2772, + "step": 19448, + "teacher_loss": 0.1938624382019043 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.36173707246780396, + "learning_rate": 1.0185398427147635e-05, + "loss": 0.196, + "step": 19449, + "teacher_loss": 0.17761212587356567 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.28245627880096436, + "learning_rate": 1.0183247408518903e-05, + "loss": 0.1624, + "step": 19450, + "teacher_loss": 0.14907124638557434 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.1912381947040558, + "learning_rate": 1.0181096500323868e-05, + "loss": 0.2131, + "step": 19451, + "teacher_loss": 0.21557781100273132 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.38765954971313477, + "learning_rate": 1.0178945702611832e-05, + "loss": 0.2891, + "step": 19452, + "teacher_loss": 0.27810221910476685 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.3280712366104126, + "learning_rate": 1.0176795015432129e-05, + "loss": 0.2165, + "step": 19453, + "teacher_loss": 0.20407480001449585 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.31434187293052673, + "learning_rate": 1.0174644438834053e-05, + "loss": 0.2596, + "step": 19454, + "teacher_loss": 0.2535129487514496 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.531960666179657, + "learning_rate": 1.0172493972866904e-05, + "loss": 0.2585, + "step": 19455, + "teacher_loss": 0.22806000709533691 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.2145223617553711, + "learning_rate": 1.0170343617580005e-05, + "loss": 0.2598, + "step": 19456, + "teacher_loss": 0.2648307681083679 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5329656600952148, + "learning_rate": 1.0168193373022644e-05, + "loss": 0.2438, + "step": 19457, + "teacher_loss": 0.2117207795381546 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.6372746825218201, + "learning_rate": 1.0166043239244119e-05, + "loss": 0.2692, + "step": 19458, + "teacher_loss": 0.22828815877437592 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.44345855712890625, + "learning_rate": 1.0163893216293733e-05, + "loss": 0.207, + "step": 19459, + "teacher_loss": 0.1807098388671875 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5052556395530701, + "learning_rate": 1.0161743304220774e-05, + "loss": 0.3101, + "step": 19460, + "teacher_loss": 0.2884673774242401 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.25631698966026306, + "learning_rate": 1.0159593503074535e-05, + "loss": 0.1972, + "step": 19461, + "teacher_loss": 0.19068075716495514 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.3342117369174957, + "learning_rate": 1.0157443812904306e-05, + "loss": 0.1802, + "step": 19462, + "teacher_loss": 0.1631053239107132 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5531876683235168, + "learning_rate": 1.0155294233759373e-05, + "loss": 0.291, + "step": 19463, + "teacher_loss": 0.26189544796943665 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.24584102630615234, + "learning_rate": 1.0153144765689014e-05, + "loss": 0.193, + "step": 19464, + "teacher_loss": 0.18716412782669067 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.32678771018981934, + "learning_rate": 1.0150995408742518e-05, + "loss": 0.2532, + "step": 19465, + "teacher_loss": 0.24505354464054108 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.45356231927871704, + "learning_rate": 1.014884616296916e-05, + "loss": 0.2194, + "step": 19466, + "teacher_loss": 0.19334356486797333 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.8779394626617432, + "learning_rate": 1.0146697028418207e-05, + "loss": 0.3283, + "step": 19467, + "teacher_loss": 0.26720917224884033 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.6529161334037781, + "learning_rate": 1.014454800513895e-05, + "loss": 0.2171, + "step": 19468, + "teacher_loss": 0.16865789890289307 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5911378860473633, + "learning_rate": 1.0142399093180646e-05, + "loss": 0.2277, + "step": 19469, + "teacher_loss": 0.1873677670955658 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.261684387922287, + "learning_rate": 1.0140250292592568e-05, + "loss": 0.1864, + "step": 19470, + "teacher_loss": 0.17801953852176666 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.3821369409561157, + "learning_rate": 1.0138101603423978e-05, + "loss": 0.3003, + "step": 19471, + "teacher_loss": 0.2911791205406189 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.4049680233001709, + "learning_rate": 1.0135953025724142e-05, + "loss": 0.2797, + "step": 19472, + "teacher_loss": 0.26583564281463623 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.3070334792137146, + "learning_rate": 1.0133804559542322e-05, + "loss": 0.1655, + "step": 19473, + "teacher_loss": 0.14982284605503082 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.18226468563079834, + "learning_rate": 1.013165620492777e-05, + "loss": 0.1957, + "step": 19474, + "teacher_loss": 0.19716814160346985 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.19842079281806946, + "learning_rate": 1.0129507961929749e-05, + "loss": 0.2234, + "step": 19475, + "teacher_loss": 0.22617238759994507 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5729424953460693, + "learning_rate": 1.012735983059751e-05, + "loss": 0.2573, + "step": 19476, + "teacher_loss": 0.22227749228477478 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.30991387367248535, + "learning_rate": 1.0125211810980292e-05, + "loss": 0.1476, + "step": 19477, + "teacher_loss": 0.12961483001708984 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.6316435933113098, + "learning_rate": 1.012306390312736e-05, + "loss": 0.3661, + "step": 19478, + "teacher_loss": 0.3365713953971863 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.6574933528900146, + "learning_rate": 1.012091610708795e-05, + "loss": 0.2795, + "step": 19479, + "teacher_loss": 0.23745989799499512 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5261728167533875, + "learning_rate": 1.01187684229113e-05, + "loss": 0.3401, + "step": 19480, + "teacher_loss": 0.3194471299648285 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.20688362419605255, + "learning_rate": 1.0116620850646661e-05, + "loss": 0.1552, + "step": 19481, + "teacher_loss": 0.14949235320091248 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.6179096698760986, + "learning_rate": 1.0114473390343264e-05, + "loss": 0.2737, + "step": 19482, + "teacher_loss": 0.23549196124076843 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.9272891283035278, + "learning_rate": 1.011232604205034e-05, + "loss": 0.386, + "step": 19483, + "teacher_loss": 0.3259025812149048 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.2913195788860321, + "learning_rate": 1.0110178805817132e-05, + "loss": 0.1867, + "step": 19484, + "teacher_loss": 0.17502276599407196 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.3823346495628357, + "learning_rate": 1.0108031681692862e-05, + "loss": 0.2281, + "step": 19485, + "teacher_loss": 0.21092775464057922 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.6832438707351685, + "learning_rate": 1.0105884669726755e-05, + "loss": 0.3161, + "step": 19486, + "teacher_loss": 0.2752572298049927 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.33236807584762573, + "learning_rate": 1.010373776996804e-05, + "loss": 0.2819, + "step": 19487, + "teacher_loss": 0.2762737274169922 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.44646745920181274, + "learning_rate": 1.0101590982465942e-05, + "loss": 0.2059, + "step": 19488, + "teacher_loss": 0.17920741438865662 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.38612663745880127, + "learning_rate": 1.009944430726968e-05, + "loss": 0.2807, + "step": 19489, + "teacher_loss": 0.26895081996917725 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.25808462500572205, + "learning_rate": 1.0097297744428456e-05, + "loss": 0.211, + "step": 19490, + "teacher_loss": 0.2057158350944519 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.21082568168640137, + "learning_rate": 1.0095151293991506e-05, + "loss": 0.1702, + "step": 19491, + "teacher_loss": 0.16574159264564514 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5113463401794434, + "learning_rate": 1.0093004956008026e-05, + "loss": 0.2925, + "step": 19492, + "teacher_loss": 0.26819223165512085 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.4059942364692688, + "learning_rate": 1.009085873052723e-05, + "loss": 0.189, + "step": 19493, + "teacher_loss": 0.1648443192243576 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.38825899362564087, + "learning_rate": 1.0088712617598325e-05, + "loss": 0.2052, + "step": 19494, + "teacher_loss": 0.1848527491092682 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5189722180366516, + "learning_rate": 1.0086566617270518e-05, + "loss": 0.4149, + "step": 19495, + "teacher_loss": 0.40337663888931274 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.20231488347053528, + "learning_rate": 1.0084420729593004e-05, + "loss": 0.1949, + "step": 19496, + "teacher_loss": 0.19408470392227173 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.23983778059482574, + "learning_rate": 1.0082274954614987e-05, + "loss": 0.1574, + "step": 19497, + "teacher_loss": 0.14820244908332825 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.32351869344711304, + "learning_rate": 1.0080129292385661e-05, + "loss": 0.203, + "step": 19498, + "teacher_loss": 0.18957358598709106 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.34295618534088135, + "learning_rate": 1.0077983742954217e-05, + "loss": 0.2042, + "step": 19499, + "teacher_loss": 0.18879255652427673 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.13781006634235382, + "learning_rate": 1.0075838306369852e-05, + "loss": 0.1721, + "step": 19500, + "teacher_loss": 0.1759313941001892 + }, + { + "epoch": 3.52, + "eval_exact_match": 79.78240302743615, + "eval_f1": 87.32966072047768, + "step": 19500 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.5822824239730835, + "learning_rate": 1.0073692982681755e-05, + "loss": 0.3034, + "step": 19501, + "teacher_loss": 0.27242371439933777 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.18304210901260376, + "learning_rate": 1.0071547771939098e-05, + "loss": 0.1976, + "step": 19502, + "teacher_loss": 0.19925439357757568 + }, + { + "compression_loss": 0.0, + "epoch": 3.52, + "label_loss": 0.24840891361236572, + "learning_rate": 1.0069402674191083e-05, + "loss": 0.2145, + "step": 19503, + "teacher_loss": 0.21070721745491028 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.16617180407047272, + "learning_rate": 1.0067257689486879e-05, + "loss": 0.1785, + "step": 19504, + "teacher_loss": 0.17992311716079712 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.2808770537376404, + "learning_rate": 1.0065112817875662e-05, + "loss": 0.1952, + "step": 19505, + "teacher_loss": 0.18571394681930542 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.49665361642837524, + "learning_rate": 1.0062968059406621e-05, + "loss": 0.2884, + "step": 19506, + "teacher_loss": 0.2652703523635864 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.4683324992656708, + "learning_rate": 1.0060823414128917e-05, + "loss": 0.2236, + "step": 19507, + "teacher_loss": 0.19642239809036255 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6576347351074219, + "learning_rate": 1.0058678882091721e-05, + "loss": 0.2549, + "step": 19508, + "teacher_loss": 0.21014100313186646 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.8172670006752014, + "learning_rate": 1.0056534463344207e-05, + "loss": 0.3044, + "step": 19509, + "teacher_loss": 0.24744324386119843 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6028466820716858, + "learning_rate": 1.0054390157935536e-05, + "loss": 0.3047, + "step": 19510, + "teacher_loss": 0.27160191535949707 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.36131414771080017, + "learning_rate": 1.0052245965914872e-05, + "loss": 0.2061, + "step": 19511, + "teacher_loss": 0.18881818652153015 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.5852622985839844, + "learning_rate": 1.005010188733137e-05, + "loss": 0.2484, + "step": 19512, + "teacher_loss": 0.2109193354845047 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.41420358419418335, + "learning_rate": 1.0047957922234194e-05, + "loss": 0.1796, + "step": 19513, + "teacher_loss": 0.15348698198795319 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.21973851323127747, + "learning_rate": 1.0045814070672498e-05, + "loss": 0.1682, + "step": 19514, + "teacher_loss": 0.16244924068450928 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.8999911546707153, + "learning_rate": 1.0043670332695426e-05, + "loss": 0.3054, + "step": 19515, + "teacher_loss": 0.23933374881744385 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.5979779362678528, + "learning_rate": 1.0041526708352139e-05, + "loss": 0.2097, + "step": 19516, + "teacher_loss": 0.16652944684028625 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.3461306393146515, + "learning_rate": 1.0039383197691775e-05, + "loss": 0.2179, + "step": 19517, + "teacher_loss": 0.2036415934562683 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.8049126863479614, + "learning_rate": 1.003723980076348e-05, + "loss": 0.3317, + "step": 19518, + "teacher_loss": 0.2790743112564087 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.4850609302520752, + "learning_rate": 1.00350965176164e-05, + "loss": 0.2241, + "step": 19519, + "teacher_loss": 0.1950729787349701 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.3148100972175598, + "learning_rate": 1.0032953348299674e-05, + "loss": 0.1669, + "step": 19520, + "teacher_loss": 0.15047243237495422 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6103925108909607, + "learning_rate": 1.0030810292862429e-05, + "loss": 0.2993, + "step": 19521, + "teacher_loss": 0.26469236612319946 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6049237251281738, + "learning_rate": 1.0028667351353809e-05, + "loss": 0.2884, + "step": 19522, + "teacher_loss": 0.2531885802745819 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.4821338653564453, + "learning_rate": 1.0026524523822944e-05, + "loss": 0.2713, + "step": 19523, + "teacher_loss": 0.24789506196975708 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.7074856758117676, + "learning_rate": 1.0024381810318956e-05, + "loss": 0.3152, + "step": 19524, + "teacher_loss": 0.2716485857963562 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.45420753955841064, + "learning_rate": 1.0022239210890977e-05, + "loss": 0.2018, + "step": 19525, + "teacher_loss": 0.17370617389678955 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.42822980880737305, + "learning_rate": 1.0020096725588135e-05, + "loss": 0.258, + "step": 19526, + "teacher_loss": 0.23907725512981415 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.707569420337677, + "learning_rate": 1.0017954354459535e-05, + "loss": 0.3548, + "step": 19527, + "teacher_loss": 0.3156408667564392 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.5429368019104004, + "learning_rate": 1.001581209755431e-05, + "loss": 0.2333, + "step": 19528, + "teacher_loss": 0.19890055060386658 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6895972490310669, + "learning_rate": 1.0013669954921572e-05, + "loss": 0.3556, + "step": 19529, + "teacher_loss": 0.3184818625450134 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.3465479612350464, + "learning_rate": 1.0011527926610425e-05, + "loss": 0.2006, + "step": 19530, + "teacher_loss": 0.18438659608364105 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.3681927025318146, + "learning_rate": 1.0009386012669995e-05, + "loss": 0.2389, + "step": 19531, + "teacher_loss": 0.22450140118598938 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.24070215225219727, + "learning_rate": 1.0007244213149377e-05, + "loss": 0.1996, + "step": 19532, + "teacher_loss": 0.1950719654560089 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.9446883201599121, + "learning_rate": 1.0005102528097679e-05, + "loss": 0.3247, + "step": 19533, + "teacher_loss": 0.2557964324951172 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.2997077405452728, + "learning_rate": 1.0002960957564004e-05, + "loss": 0.2306, + "step": 19534, + "teacher_loss": 0.2228684425354004 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.3986530601978302, + "learning_rate": 1.0000819501597454e-05, + "loss": 0.1827, + "step": 19535, + "teacher_loss": 0.15870392322540283 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.5486023426055908, + "learning_rate": 9.998678160247127e-06, + "loss": 0.2135, + "step": 19536, + "teacher_loss": 0.17630544304847717 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.445093035697937, + "learning_rate": 9.996536933562108e-06, + "loss": 0.2582, + "step": 19537, + "teacher_loss": 0.23742592334747314 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.11764411628246307, + "learning_rate": 9.994395821591501e-06, + "loss": 0.1817, + "step": 19538, + "teacher_loss": 0.18886131048202515 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6120966672897339, + "learning_rate": 9.992254824384396e-06, + "loss": 0.5214, + "step": 19539, + "teacher_loss": 0.5113601088523865 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.1887083798646927, + "learning_rate": 9.99011394198986e-06, + "loss": 0.1641, + "step": 19540, + "teacher_loss": 0.16137921810150146 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.8668692708015442, + "learning_rate": 9.987973174457002e-06, + "loss": 0.2877, + "step": 19541, + "teacher_loss": 0.2233489602804184 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.3059224784374237, + "learning_rate": 9.98583252183489e-06, + "loss": 0.2073, + "step": 19542, + "teacher_loss": 0.19631874561309814 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.31433677673339844, + "learning_rate": 9.983691984172602e-06, + "loss": 0.2466, + "step": 19543, + "teacher_loss": 0.2390194684267044 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.23233336210250854, + "learning_rate": 9.981551561519219e-06, + "loss": 0.2949, + "step": 19544, + "teacher_loss": 0.3018597364425659 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.23078562319278717, + "learning_rate": 9.979411253923813e-06, + "loss": 0.3537, + "step": 19545, + "teacher_loss": 0.36735644936561584 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.49961578845977783, + "learning_rate": 9.977271061435451e-06, + "loss": 0.2149, + "step": 19546, + "teacher_loss": 0.18331004679203033 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.5616172552108765, + "learning_rate": 9.97513098410321e-06, + "loss": 0.2807, + "step": 19547, + "teacher_loss": 0.2495173066854477 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.48349398374557495, + "learning_rate": 9.972991021976147e-06, + "loss": 0.1909, + "step": 19548, + "teacher_loss": 0.15840110182762146 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.18089839816093445, + "learning_rate": 9.970851175103329e-06, + "loss": 0.1626, + "step": 19549, + "teacher_loss": 0.16053500771522522 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.270516037940979, + "learning_rate": 9.968711443533814e-06, + "loss": 0.1732, + "step": 19550, + "teacher_loss": 0.16240260004997253 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.4094671607017517, + "learning_rate": 9.966571827316666e-06, + "loss": 0.2032, + "step": 19551, + "teacher_loss": 0.18033233284950256 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.2510198950767517, + "learning_rate": 9.964432326500933e-06, + "loss": 0.1724, + "step": 19552, + "teacher_loss": 0.1636662483215332 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.9874939918518066, + "learning_rate": 9.96229294113566e-06, + "loss": 0.4215, + "step": 19553, + "teacher_loss": 0.3585950434207916 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6145588159561157, + "learning_rate": 9.960153671269917e-06, + "loss": 0.2127, + "step": 19554, + "teacher_loss": 0.1680116355419159 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.49198538064956665, + "learning_rate": 9.95801451695274e-06, + "loss": 0.2809, + "step": 19555, + "teacher_loss": 0.2574813961982727 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6815842986106873, + "learning_rate": 9.955875478233166e-06, + "loss": 0.3187, + "step": 19556, + "teacher_loss": 0.27836674451828003 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.6621488332748413, + "learning_rate": 9.95373655516025e-06, + "loss": 0.2417, + "step": 19557, + "teacher_loss": 0.1949477195739746 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.4806235432624817, + "learning_rate": 9.951597747783024e-06, + "loss": 0.4308, + "step": 19558, + "teacher_loss": 0.42531657218933105 + }, + { + "compression_loss": 0.0, + "epoch": 3.53, + "label_loss": 0.09620915353298187, + "learning_rate": 9.949459056150524e-06, + "loss": 0.1217, + "step": 19559, + "teacher_loss": 0.12455607950687408 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.4135897159576416, + "learning_rate": 9.947320480311788e-06, + "loss": 0.3702, + "step": 19560, + "teacher_loss": 0.36541837453842163 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.44637465476989746, + "learning_rate": 9.945182020315845e-06, + "loss": 0.2654, + "step": 19561, + "teacher_loss": 0.24532447755336761 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.23862197995185852, + "learning_rate": 9.943043676211718e-06, + "loss": 0.1755, + "step": 19562, + "teacher_loss": 0.1684625893831253 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.27720773220062256, + "learning_rate": 9.940905448048444e-06, + "loss": 0.1631, + "step": 19563, + "teacher_loss": 0.15041495859622955 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.886763334274292, + "learning_rate": 9.93876733587504e-06, + "loss": 0.5617, + "step": 19564, + "teacher_loss": 0.5256365537643433 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.49012044072151184, + "learning_rate": 9.936629339740519e-06, + "loss": 0.4488, + "step": 19565, + "teacher_loss": 0.44417595863342285 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.18380270898342133, + "learning_rate": 9.934491459693916e-06, + "loss": 0.1244, + "step": 19566, + "teacher_loss": 0.11774909496307373 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.5364276766777039, + "learning_rate": 9.932353695784233e-06, + "loss": 0.2726, + "step": 19567, + "teacher_loss": 0.24334004521369934 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.2974734306335449, + "learning_rate": 9.930216048060484e-06, + "loss": 0.1837, + "step": 19568, + "teacher_loss": 0.17110282182693481 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.29208338260650635, + "learning_rate": 9.928078516571683e-06, + "loss": 0.2176, + "step": 19569, + "teacher_loss": 0.20934754610061646 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.2601548433303833, + "learning_rate": 9.925941101366835e-06, + "loss": 0.1785, + "step": 19570, + "teacher_loss": 0.1694105863571167 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.37648963928222656, + "learning_rate": 9.923803802494945e-06, + "loss": 0.2679, + "step": 19571, + "teacher_loss": 0.25581812858581543 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.24660523235797882, + "learning_rate": 9.921666620005013e-06, + "loss": 0.2102, + "step": 19572, + "teacher_loss": 0.20613731443881989 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.24940378963947296, + "learning_rate": 9.91952955394604e-06, + "loss": 0.1771, + "step": 19573, + "teacher_loss": 0.16907119750976562 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 1.0295100212097168, + "learning_rate": 9.91739260436703e-06, + "loss": 0.4258, + "step": 19574, + "teacher_loss": 0.3586958646774292 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.6346731185913086, + "learning_rate": 9.915255771316957e-06, + "loss": 0.3011, + "step": 19575, + "teacher_loss": 0.26407235860824585 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.47836095094680786, + "learning_rate": 9.913119054844833e-06, + "loss": 0.2731, + "step": 19576, + "teacher_loss": 0.2502423822879791 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.3255135416984558, + "learning_rate": 9.910982454999636e-06, + "loss": 0.2904, + "step": 19577, + "teacher_loss": 0.2864677309989929 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.3129858076572418, + "learning_rate": 9.908845971830345e-06, + "loss": 0.3032, + "step": 19578, + "teacher_loss": 0.3021564483642578 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.4534338116645813, + "learning_rate": 9.906709605385963e-06, + "loss": 0.2795, + "step": 19579, + "teacher_loss": 0.26018524169921875 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.38047921657562256, + "learning_rate": 9.904573355715457e-06, + "loss": 0.2543, + "step": 19580, + "teacher_loss": 0.24026885628700256 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.22300752997398376, + "learning_rate": 9.902437222867802e-06, + "loss": 0.2371, + "step": 19581, + "teacher_loss": 0.23871418833732605 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.5068744421005249, + "learning_rate": 9.900301206891984e-06, + "loss": 0.1771, + "step": 19582, + "teacher_loss": 0.14046257734298706 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 1.2976257801055908, + "learning_rate": 9.898165307836966e-06, + "loss": 0.367, + "step": 19583, + "teacher_loss": 0.2636162340641022 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.37200456857681274, + "learning_rate": 9.896029525751721e-06, + "loss": 0.1799, + "step": 19584, + "teacher_loss": 0.15856097638607025 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.25944653153419495, + "learning_rate": 9.893893860685219e-06, + "loss": 0.2153, + "step": 19585, + "teacher_loss": 0.21040308475494385 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.7618865370750427, + "learning_rate": 9.891758312686421e-06, + "loss": 0.2938, + "step": 19586, + "teacher_loss": 0.24180731177330017 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.3785143494606018, + "learning_rate": 9.88962288180429e-06, + "loss": 0.2325, + "step": 19587, + "teacher_loss": 0.21629473567008972 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.2821401059627533, + "learning_rate": 9.887487568087782e-06, + "loss": 0.2851, + "step": 19588, + "teacher_loss": 0.28543734550476074 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.17639687657356262, + "learning_rate": 9.885352371585862e-06, + "loss": 0.1488, + "step": 19589, + "teacher_loss": 0.1457221806049347 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.35193362832069397, + "learning_rate": 9.883217292347469e-06, + "loss": 0.2283, + "step": 19590, + "teacher_loss": 0.2145354449748993 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.39796435832977295, + "learning_rate": 9.881082330421571e-06, + "loss": 0.2486, + "step": 19591, + "teacher_loss": 0.2319633960723877 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.7542493343353271, + "learning_rate": 9.878947485857104e-06, + "loss": 0.2582, + "step": 19592, + "teacher_loss": 0.20306503772735596 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.284929484128952, + "learning_rate": 9.87681275870302e-06, + "loss": 0.162, + "step": 19593, + "teacher_loss": 0.14835819602012634 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.4068587124347687, + "learning_rate": 9.874678149008254e-06, + "loss": 0.2256, + "step": 19594, + "teacher_loss": 0.20545810461044312 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.6160931587219238, + "learning_rate": 9.872543656821755e-06, + "loss": 0.3716, + "step": 19595, + "teacher_loss": 0.34442809224128723 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.8495842218399048, + "learning_rate": 9.870409282192456e-06, + "loss": 0.3121, + "step": 19596, + "teacher_loss": 0.2524319887161255 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.4769754409790039, + "learning_rate": 9.868275025169291e-06, + "loss": 0.3982, + "step": 19597, + "teacher_loss": 0.3894667625427246 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.37383565306663513, + "learning_rate": 9.866140885801198e-06, + "loss": 0.1873, + "step": 19598, + "teacher_loss": 0.16660335659980774 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.35413438081741333, + "learning_rate": 9.864006864137104e-06, + "loss": 0.2652, + "step": 19599, + "teacher_loss": 0.2553294003009796 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.6458280086517334, + "learning_rate": 9.861872960225925e-06, + "loss": 0.43, + "step": 19600, + "teacher_loss": 0.4060248136520386 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.22548463940620422, + "learning_rate": 9.859739174116606e-06, + "loss": 0.176, + "step": 19601, + "teacher_loss": 0.17048616707324982 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.5701205730438232, + "learning_rate": 9.85760550585805e-06, + "loss": 0.3207, + "step": 19602, + "teacher_loss": 0.2929496467113495 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.4363097846508026, + "learning_rate": 9.855471955499175e-06, + "loss": 0.2301, + "step": 19603, + "teacher_loss": 0.20724061131477356 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.17100924253463745, + "learning_rate": 9.853338523088917e-06, + "loss": 0.1994, + "step": 19604, + "teacher_loss": 0.20257365703582764 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.24860334396362305, + "learning_rate": 9.85120520867617e-06, + "loss": 0.2299, + "step": 19605, + "teacher_loss": 0.22783200442790985 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.6802994012832642, + "learning_rate": 9.84907201230985e-06, + "loss": 0.2452, + "step": 19606, + "teacher_loss": 0.1968328058719635 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.21299207210540771, + "learning_rate": 9.846938934038867e-06, + "loss": 0.3307, + "step": 19607, + "teacher_loss": 0.3437294363975525 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.32776594161987305, + "learning_rate": 9.844805973912126e-06, + "loss": 0.1946, + "step": 19608, + "teacher_loss": 0.1798492819070816 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.09490704536437988, + "learning_rate": 9.842673131978522e-06, + "loss": 0.1889, + "step": 19609, + "teacher_loss": 0.19938504695892334 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.3669765293598175, + "learning_rate": 9.840540408286966e-06, + "loss": 0.1617, + "step": 19610, + "teacher_loss": 0.13884076476097107 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.27929139137268066, + "learning_rate": 9.838407802886349e-06, + "loss": 0.1934, + "step": 19611, + "teacher_loss": 0.18383339047431946 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.33364367485046387, + "learning_rate": 9.836275315825563e-06, + "loss": 0.2464, + "step": 19612, + "teacher_loss": 0.23672693967819214 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 1.0206588506698608, + "learning_rate": 9.834142947153507e-06, + "loss": 0.5595, + "step": 19613, + "teacher_loss": 0.5082470178604126 + }, + { + "compression_loss": 0.0, + "epoch": 3.54, + "label_loss": 0.4007933735847473, + "learning_rate": 9.832010696919065e-06, + "loss": 0.2217, + "step": 19614, + "teacher_loss": 0.20182600617408752 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.3271321654319763, + "learning_rate": 9.829878565171123e-06, + "loss": 0.3442, + "step": 19615, + "teacher_loss": 0.34608978033065796 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.7647700309753418, + "learning_rate": 9.82774655195856e-06, + "loss": 0.285, + "step": 19616, + "teacher_loss": 0.2316952496767044 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.49964356422424316, + "learning_rate": 9.825614657330264e-06, + "loss": 0.2612, + "step": 19617, + "teacher_loss": 0.23469607532024384 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.6729662418365479, + "learning_rate": 9.823482881335113e-06, + "loss": 0.2283, + "step": 19618, + "teacher_loss": 0.17889279127120972 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.14052435755729675, + "learning_rate": 9.821351224021974e-06, + "loss": 0.1614, + "step": 19619, + "teacher_loss": 0.16371260583400726 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.3000051975250244, + "learning_rate": 9.819219685439728e-06, + "loss": 0.2022, + "step": 19620, + "teacher_loss": 0.19131392240524292 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.2325604259967804, + "learning_rate": 9.817088265637242e-06, + "loss": 0.1537, + "step": 19621, + "teacher_loss": 0.14499130845069885 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.17120827734470367, + "learning_rate": 9.81495696466338e-06, + "loss": 0.1663, + "step": 19622, + "teacher_loss": 0.165754497051239 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.6758211255073547, + "learning_rate": 9.812825782567011e-06, + "loss": 0.222, + "step": 19623, + "teacher_loss": 0.1715477555990219 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.33912909030914307, + "learning_rate": 9.810694719397003e-06, + "loss": 0.2778, + "step": 19624, + "teacher_loss": 0.2710181474685669 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.7419806718826294, + "learning_rate": 9.808563775202192e-06, + "loss": 0.3864, + "step": 19625, + "teacher_loss": 0.34690678119659424 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.43733108043670654, + "learning_rate": 9.806432950031461e-06, + "loss": 0.2267, + "step": 19626, + "teacher_loss": 0.20334112644195557 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.6312950849533081, + "learning_rate": 9.804302243933646e-06, + "loss": 0.283, + "step": 19627, + "teacher_loss": 0.24428969621658325 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.29214754700660706, + "learning_rate": 9.8021716569576e-06, + "loss": 0.2382, + "step": 19628, + "teacher_loss": 0.23222273588180542 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.516738772392273, + "learning_rate": 9.800041189152183e-06, + "loss": 0.2018, + "step": 19629, + "teacher_loss": 0.16680949926376343 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5969705581665039, + "learning_rate": 9.797910840566224e-06, + "loss": 0.2601, + "step": 19630, + "teacher_loss": 0.22270682454109192 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.28059905767440796, + "learning_rate": 9.795780611248572e-06, + "loss": 0.2574, + "step": 19631, + "teacher_loss": 0.2547770142555237 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.8923881649971008, + "learning_rate": 9.793650501248071e-06, + "loss": 0.2567, + "step": 19632, + "teacher_loss": 0.18606463074684143 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.31804007291793823, + "learning_rate": 9.791520510613555e-06, + "loss": 0.2269, + "step": 19633, + "teacher_loss": 0.21677199006080627 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.38350895047187805, + "learning_rate": 9.789390639393856e-06, + "loss": 0.2146, + "step": 19634, + "teacher_loss": 0.1958124041557312 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.42553991079330444, + "learning_rate": 9.787260887637809e-06, + "loss": 0.1813, + "step": 19635, + "teacher_loss": 0.15418201684951782 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.40932339429855347, + "learning_rate": 9.78513125539424e-06, + "loss": 0.2379, + "step": 19636, + "teacher_loss": 0.21889640390872955 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.27775052189826965, + "learning_rate": 9.78300174271198e-06, + "loss": 0.37, + "step": 19637, + "teacher_loss": 0.3802076578140259 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.3273114264011383, + "learning_rate": 9.78087234963984e-06, + "loss": 0.1676, + "step": 19638, + "teacher_loss": 0.14987322688102722 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.2997969686985016, + "learning_rate": 9.77874307622666e-06, + "loss": 0.1805, + "step": 19639, + "teacher_loss": 0.16725599765777588 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5749364495277405, + "learning_rate": 9.776613922521243e-06, + "loss": 0.2255, + "step": 19640, + "teacher_loss": 0.186646968126297 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.29374784231185913, + "learning_rate": 9.774484888572404e-06, + "loss": 0.2075, + "step": 19641, + "teacher_loss": 0.19790875911712646 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 1.1676479578018188, + "learning_rate": 9.772355974428962e-06, + "loss": 0.2977, + "step": 19642, + "teacher_loss": 0.20104123651981354 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.716153085231781, + "learning_rate": 9.770227180139727e-06, + "loss": 0.2459, + "step": 19643, + "teacher_loss": 0.19365613162517548 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.8064671754837036, + "learning_rate": 9.768098505753498e-06, + "loss": 0.2993, + "step": 19644, + "teacher_loss": 0.24296057224273682 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5965031385421753, + "learning_rate": 9.765969951319088e-06, + "loss": 0.2498, + "step": 19645, + "teacher_loss": 0.21131715178489685 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.4783465266227722, + "learning_rate": 9.763841516885293e-06, + "loss": 0.2606, + "step": 19646, + "teacher_loss": 0.23639436066150665 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.4817565381526947, + "learning_rate": 9.761713202500911e-06, + "loss": 0.2585, + "step": 19647, + "teacher_loss": 0.2337372750043869 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.29709237813949585, + "learning_rate": 9.759585008214745e-06, + "loss": 0.1979, + "step": 19648, + "teacher_loss": 0.18688073754310608 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.4689497947692871, + "learning_rate": 9.757456934075585e-06, + "loss": 0.182, + "step": 19649, + "teacher_loss": 0.1501690149307251 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.32565292716026306, + "learning_rate": 9.755328980132211e-06, + "loss": 0.2267, + "step": 19650, + "teacher_loss": 0.2156551331281662 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.49341973662376404, + "learning_rate": 9.753201146433422e-06, + "loss": 0.2323, + "step": 19651, + "teacher_loss": 0.2032516598701477 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5770326852798462, + "learning_rate": 9.751073433028006e-06, + "loss": 0.3138, + "step": 19652, + "teacher_loss": 0.28451165556907654 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.14524126052856445, + "learning_rate": 9.74894583996473e-06, + "loss": 0.2479, + "step": 19653, + "teacher_loss": 0.2593310475349426 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.49560484290122986, + "learning_rate": 9.746818367292392e-06, + "loss": 0.2796, + "step": 19654, + "teacher_loss": 0.25560450553894043 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.7056359052658081, + "learning_rate": 9.744691015059755e-06, + "loss": 0.2445, + "step": 19655, + "teacher_loss": 0.19325533509254456 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.6361950635910034, + "learning_rate": 9.742563783315596e-06, + "loss": 0.3001, + "step": 19656, + "teacher_loss": 0.26275214552879333 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.2903074026107788, + "learning_rate": 9.740436672108686e-06, + "loss": 0.172, + "step": 19657, + "teacher_loss": 0.15886715054512024 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.687466561794281, + "learning_rate": 9.738309681487794e-06, + "loss": 0.2652, + "step": 19658, + "teacher_loss": 0.21822988986968994 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.3813740015029907, + "learning_rate": 9.736182811501688e-06, + "loss": 0.2624, + "step": 19659, + "teacher_loss": 0.2491583675146103 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5326455235481262, + "learning_rate": 9.734056062199124e-06, + "loss": 0.2689, + "step": 19660, + "teacher_loss": 0.23958337306976318 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.20033520460128784, + "learning_rate": 9.731929433628871e-06, + "loss": 0.2578, + "step": 19661, + "teacher_loss": 0.2641976773738861 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.4973335266113281, + "learning_rate": 9.729802925839683e-06, + "loss": 0.2825, + "step": 19662, + "teacher_loss": 0.2585833668708801 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.7444344162940979, + "learning_rate": 9.727676538880306e-06, + "loss": 0.298, + "step": 19663, + "teacher_loss": 0.24838249385356903 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.26262909173965454, + "learning_rate": 9.725550272799506e-06, + "loss": 0.1571, + "step": 19664, + "teacher_loss": 0.14540642499923706 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5884849429130554, + "learning_rate": 9.723424127646022e-06, + "loss": 0.1952, + "step": 19665, + "teacher_loss": 0.15154540538787842 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.5201140642166138, + "learning_rate": 9.721298103468599e-06, + "loss": 0.2697, + "step": 19666, + "teacher_loss": 0.2418575882911682 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.23165124654769897, + "learning_rate": 9.719172200315989e-06, + "loss": 0.162, + "step": 19667, + "teacher_loss": 0.15423768758773804 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.659850001335144, + "learning_rate": 9.717046418236927e-06, + "loss": 0.3562, + "step": 19668, + "teacher_loss": 0.3224598467350006 + }, + { + "compression_loss": 0.0, + "epoch": 3.55, + "label_loss": 0.4666239023208618, + "learning_rate": 9.714920757280147e-06, + "loss": 0.3202, + "step": 19669, + "teacher_loss": 0.3039490878582001 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.28869518637657166, + "learning_rate": 9.712795217494394e-06, + "loss": 0.1846, + "step": 19670, + "teacher_loss": 0.1730433702468872 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.34344086050987244, + "learning_rate": 9.710669798928395e-06, + "loss": 0.2003, + "step": 19671, + "teacher_loss": 0.18440741300582886 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.8189817070960999, + "learning_rate": 9.708544501630874e-06, + "loss": 0.371, + "step": 19672, + "teacher_loss": 0.3212584853172302 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.8164449334144592, + "learning_rate": 9.706419325650566e-06, + "loss": 0.2908, + "step": 19673, + "teacher_loss": 0.23241671919822693 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.347476601600647, + "learning_rate": 9.704294271036196e-06, + "loss": 0.2873, + "step": 19674, + "teacher_loss": 0.28063392639160156 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.4515160322189331, + "learning_rate": 9.70216933783647e-06, + "loss": 0.2043, + "step": 19675, + "teacher_loss": 0.1768435835838318 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.16849662363529205, + "learning_rate": 9.700044526100126e-06, + "loss": 0.1706, + "step": 19676, + "teacher_loss": 0.1708613932132721 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.542802095413208, + "learning_rate": 9.697919835875873e-06, + "loss": 0.2467, + "step": 19677, + "teacher_loss": 0.2138374298810959 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.6454569101333618, + "learning_rate": 9.69579526721242e-06, + "loss": 0.3479, + "step": 19678, + "teacher_loss": 0.31488853693008423 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.2728041112422943, + "learning_rate": 9.693670820158474e-06, + "loss": 0.1694, + "step": 19679, + "teacher_loss": 0.15791793167591095 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3199935853481293, + "learning_rate": 9.691546494762749e-06, + "loss": 0.1908, + "step": 19680, + "teacher_loss": 0.17641231417655945 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3441593050956726, + "learning_rate": 9.689422291073949e-06, + "loss": 0.3505, + "step": 19681, + "teacher_loss": 0.351242333650589 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.505041241645813, + "learning_rate": 9.68729820914077e-06, + "loss": 0.2512, + "step": 19682, + "teacher_loss": 0.22300291061401367 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.22614985704421997, + "learning_rate": 9.685174249011919e-06, + "loss": 0.173, + "step": 19683, + "teacher_loss": 0.16712301969528198 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5780359506607056, + "learning_rate": 9.683050410736087e-06, + "loss": 0.289, + "step": 19684, + "teacher_loss": 0.25691157579421997 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3680039942264557, + "learning_rate": 9.680926694361966e-06, + "loss": 0.2006, + "step": 19685, + "teacher_loss": 0.18197846412658691 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.30155012011528015, + "learning_rate": 9.67880309993825e-06, + "loss": 0.1846, + "step": 19686, + "teacher_loss": 0.17155423760414124 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.282605916261673, + "learning_rate": 9.676679627513628e-06, + "loss": 0.234, + "step": 19687, + "teacher_loss": 0.22864636778831482 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.4235907196998596, + "learning_rate": 9.674556277136775e-06, + "loss": 0.24, + "step": 19688, + "teacher_loss": 0.21956247091293335 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5603359341621399, + "learning_rate": 9.672433048856387e-06, + "loss": 0.1933, + "step": 19689, + "teacher_loss": 0.15255481004714966 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.41177940368652344, + "learning_rate": 9.670309942721134e-06, + "loss": 0.2663, + "step": 19690, + "teacher_loss": 0.25018393993377686 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.2997133433818817, + "learning_rate": 9.668186958779692e-06, + "loss": 0.2282, + "step": 19691, + "teacher_loss": 0.22028802335262299 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.25963905453681946, + "learning_rate": 9.666064097080742e-06, + "loss": 0.2026, + "step": 19692, + "teacher_loss": 0.1962304413318634 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.369697630405426, + "learning_rate": 9.663941357672948e-06, + "loss": 0.2502, + "step": 19693, + "teacher_loss": 0.23697349429130554 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.45318636298179626, + "learning_rate": 9.661818740604977e-06, + "loss": 0.2049, + "step": 19694, + "teacher_loss": 0.17726637423038483 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5361760854721069, + "learning_rate": 9.659696245925502e-06, + "loss": 0.2746, + "step": 19695, + "teacher_loss": 0.24550312757492065 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.32205134630203247, + "learning_rate": 9.65757387368318e-06, + "loss": 0.2276, + "step": 19696, + "teacher_loss": 0.21709540486335754 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.32642316818237305, + "learning_rate": 9.655451623926668e-06, + "loss": 0.2563, + "step": 19697, + "teacher_loss": 0.2485392987728119 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.23751585185527802, + "learning_rate": 9.65332949670463e-06, + "loss": 0.1956, + "step": 19698, + "teacher_loss": 0.19098608195781708 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.1672850251197815, + "learning_rate": 9.651207492065723e-06, + "loss": 0.1519, + "step": 19699, + "teacher_loss": 0.15018382668495178 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.19994695484638214, + "learning_rate": 9.649085610058585e-06, + "loss": 0.1768, + "step": 19700, + "teacher_loss": 0.1741989105939865 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.37423282861709595, + "learning_rate": 9.646963850731865e-06, + "loss": 0.2486, + "step": 19701, + "teacher_loss": 0.23466435074806213 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.33560335636138916, + "learning_rate": 9.644842214134225e-06, + "loss": 0.3181, + "step": 19702, + "teacher_loss": 0.31615251302719116 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.17105823755264282, + "learning_rate": 9.642720700314294e-06, + "loss": 0.238, + "step": 19703, + "teacher_loss": 0.2453921139240265 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.24075853824615479, + "learning_rate": 9.640599309320713e-06, + "loss": 0.181, + "step": 19704, + "teacher_loss": 0.17439356446266174 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5519812107086182, + "learning_rate": 9.638478041202124e-06, + "loss": 0.3002, + "step": 19705, + "teacher_loss": 0.27227842807769775 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5329298973083496, + "learning_rate": 9.636356896007158e-06, + "loss": 0.4112, + "step": 19706, + "teacher_loss": 0.3976333737373352 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.7675859928131104, + "learning_rate": 9.634235873784446e-06, + "loss": 0.3045, + "step": 19707, + "teacher_loss": 0.25303101539611816 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.4073547124862671, + "learning_rate": 9.63211497458262e-06, + "loss": 0.2117, + "step": 19708, + "teacher_loss": 0.18997898697853088 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.6389116048812866, + "learning_rate": 9.629994198450305e-06, + "loss": 0.2748, + "step": 19709, + "teacher_loss": 0.23433153331279755 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3859766721725464, + "learning_rate": 9.627873545436117e-06, + "loss": 0.2466, + "step": 19710, + "teacher_loss": 0.23110729455947876 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.26664724946022034, + "learning_rate": 9.625753015588688e-06, + "loss": 0.1996, + "step": 19711, + "teacher_loss": 0.19218634068965912 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3008532226085663, + "learning_rate": 9.623632608956632e-06, + "loss": 0.298, + "step": 19712, + "teacher_loss": 0.29765942692756653 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.63437819480896, + "learning_rate": 9.62151232558855e-06, + "loss": 0.2343, + "step": 19713, + "teacher_loss": 0.18979284167289734 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.1864594668149948, + "learning_rate": 9.619392165533077e-06, + "loss": 0.2025, + "step": 19714, + "teacher_loss": 0.20422762632369995 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.35578083992004395, + "learning_rate": 9.617272128838806e-06, + "loss": 0.2375, + "step": 19715, + "teacher_loss": 0.22437231242656708 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.21124783158302307, + "learning_rate": 9.615152215554341e-06, + "loss": 0.1833, + "step": 19716, + "teacher_loss": 0.18023629486560822 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3528038263320923, + "learning_rate": 9.613032425728297e-06, + "loss": 0.1581, + "step": 19717, + "teacher_loss": 0.13650211691856384 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3676678538322449, + "learning_rate": 9.610912759409269e-06, + "loss": 0.2048, + "step": 19718, + "teacher_loss": 0.18675173819065094 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5473875999450684, + "learning_rate": 9.608793216645852e-06, + "loss": 0.3036, + "step": 19719, + "teacher_loss": 0.2765269875526428 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.4059361219406128, + "learning_rate": 9.60667379748664e-06, + "loss": 0.2038, + "step": 19720, + "teacher_loss": 0.18134932219982147 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.22240974009037018, + "learning_rate": 9.604554501980231e-06, + "loss": 0.1592, + "step": 19721, + "teacher_loss": 0.15213757753372192 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.5658891797065735, + "learning_rate": 9.602435330175215e-06, + "loss": 0.2799, + "step": 19722, + "teacher_loss": 0.24810951948165894 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.3812227249145508, + "learning_rate": 9.600316282120165e-06, + "loss": 0.1969, + "step": 19723, + "teacher_loss": 0.1763743758201599 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.4620589017868042, + "learning_rate": 9.598197357863681e-06, + "loss": 0.1919, + "step": 19724, + "teacher_loss": 0.161843404173851 + }, + { + "compression_loss": 0.0, + "epoch": 3.56, + "label_loss": 0.16642612218856812, + "learning_rate": 9.596078557454334e-06, + "loss": 0.1813, + "step": 19725, + "teacher_loss": 0.18297290802001953 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.5155349969863892, + "learning_rate": 9.593959880940699e-06, + "loss": 0.2287, + "step": 19726, + "teacher_loss": 0.19686922430992126 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.8422822952270508, + "learning_rate": 9.591841328371364e-06, + "loss": 0.2554, + "step": 19727, + "teacher_loss": 0.1902158558368683 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.42126667499542236, + "learning_rate": 9.589722899794888e-06, + "loss": 0.1773, + "step": 19728, + "teacher_loss": 0.1502058207988739 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.31620368361473083, + "learning_rate": 9.587604595259844e-06, + "loss": 0.2388, + "step": 19729, + "teacher_loss": 0.23018312454223633 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.31127387285232544, + "learning_rate": 9.585486414814804e-06, + "loss": 0.2395, + "step": 19730, + "teacher_loss": 0.23148967325687408 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.15470194816589355, + "learning_rate": 9.583368358508322e-06, + "loss": 0.1475, + "step": 19731, + "teacher_loss": 0.14666257798671722 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.9790284633636475, + "learning_rate": 9.581250426388966e-06, + "loss": 0.2884, + "step": 19732, + "teacher_loss": 0.21169723570346832 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.7181142568588257, + "learning_rate": 9.579132618505291e-06, + "loss": 0.3231, + "step": 19733, + "teacher_loss": 0.27922362089157104 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.3325485587120056, + "learning_rate": 9.577014934905854e-06, + "loss": 0.2193, + "step": 19734, + "teacher_loss": 0.20668581128120422 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.5428762435913086, + "learning_rate": 9.574897375639202e-06, + "loss": 0.2741, + "step": 19735, + "teacher_loss": 0.2442161738872528 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.31958791613578796, + "learning_rate": 9.572779940753894e-06, + "loss": 0.2593, + "step": 19736, + "teacher_loss": 0.2525697946548462 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.9320069551467896, + "learning_rate": 9.57066263029847e-06, + "loss": 0.2562, + "step": 19737, + "teacher_loss": 0.18114247918128967 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.33093491196632385, + "learning_rate": 9.568545444321464e-06, + "loss": 0.1872, + "step": 19738, + "teacher_loss": 0.17118746042251587 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.34154704213142395, + "learning_rate": 9.566428382871439e-06, + "loss": 0.1772, + "step": 19739, + "teacher_loss": 0.1589348316192627 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.57374107837677, + "learning_rate": 9.564311445996914e-06, + "loss": 0.207, + "step": 19740, + "teacher_loss": 0.16623975336551666 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.2143765538930893, + "learning_rate": 9.562194633746433e-06, + "loss": 0.2155, + "step": 19741, + "teacher_loss": 0.215635746717453 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.642687201499939, + "learning_rate": 9.56007794616852e-06, + "loss": 0.2673, + "step": 19742, + "teacher_loss": 0.2256081998348236 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.4992945194244385, + "learning_rate": 9.557961383311715e-06, + "loss": 0.2359, + "step": 19743, + "teacher_loss": 0.20667928457260132 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.3154093623161316, + "learning_rate": 9.555844945224538e-06, + "loss": 0.2369, + "step": 19744, + "teacher_loss": 0.22817052900791168 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.7092342376708984, + "learning_rate": 9.553728631955511e-06, + "loss": 0.3649, + "step": 19745, + "teacher_loss": 0.3266732096672058 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.22606298327445984, + "learning_rate": 9.551612443553163e-06, + "loss": 0.1477, + "step": 19746, + "teacher_loss": 0.13901641964912415 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.29060009121894836, + "learning_rate": 9.549496380066009e-06, + "loss": 0.2204, + "step": 19747, + "teacher_loss": 0.2125692069530487 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.470231294631958, + "learning_rate": 9.54738044154255e-06, + "loss": 0.2281, + "step": 19748, + "teacher_loss": 0.2012127786874771 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.7771170139312744, + "learning_rate": 9.545264628031323e-06, + "loss": 0.4027, + "step": 19749, + "teacher_loss": 0.36106836795806885 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.24146516621112823, + "learning_rate": 9.543148939580817e-06, + "loss": 0.1963, + "step": 19750, + "teacher_loss": 0.1912476122379303 + }, + { + "epoch": 3.57, + "eval_exact_match": 79.80132450331126, + "eval_f1": 87.25628193867762, + "step": 19750 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.22695963084697723, + "learning_rate": 9.54103337623954e-06, + "loss": 0.1522, + "step": 19751, + "teacher_loss": 0.1438606083393097 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.41737300157546997, + "learning_rate": 9.538917938056012e-06, + "loss": 0.2016, + "step": 19752, + "teacher_loss": 0.17765183746814728 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.3666348159313202, + "learning_rate": 9.536802625078718e-06, + "loss": 0.222, + "step": 19753, + "teacher_loss": 0.2059706300497055 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.41892117261886597, + "learning_rate": 9.534687437356157e-06, + "loss": 0.1816, + "step": 19754, + "teacher_loss": 0.15522420406341553 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.7090497016906738, + "learning_rate": 9.53257237493683e-06, + "loss": 0.3244, + "step": 19755, + "teacher_loss": 0.2816123068332672 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.7846797704696655, + "learning_rate": 9.530457437869227e-06, + "loss": 0.2401, + "step": 19756, + "teacher_loss": 0.17958106100559235 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.14173713326454163, + "learning_rate": 9.528342626201834e-06, + "loss": 0.1678, + "step": 19757, + "teacher_loss": 0.1707158386707306 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.270608127117157, + "learning_rate": 9.526227939983142e-06, + "loss": 0.1714, + "step": 19758, + "teacher_loss": 0.1603391468524933 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.36473992466926575, + "learning_rate": 9.524113379261631e-06, + "loss": 0.1827, + "step": 19759, + "teacher_loss": 0.16248352825641632 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.2568415403366089, + "learning_rate": 9.521998944085788e-06, + "loss": 0.2071, + "step": 19760, + "teacher_loss": 0.2015584409236908 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.245318204164505, + "learning_rate": 9.519884634504074e-06, + "loss": 0.1875, + "step": 19761, + "teacher_loss": 0.1810951828956604 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.4073965549468994, + "learning_rate": 9.517770450564984e-06, + "loss": 0.2526, + "step": 19762, + "teacher_loss": 0.2353518307209015 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.8636690378189087, + "learning_rate": 9.515656392316981e-06, + "loss": 0.3351, + "step": 19763, + "teacher_loss": 0.27636533975601196 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.583938717842102, + "learning_rate": 9.513542459808528e-06, + "loss": 0.2299, + "step": 19764, + "teacher_loss": 0.19061490893363953 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.6297131776809692, + "learning_rate": 9.511428653088101e-06, + "loss": 0.1918, + "step": 19765, + "teacher_loss": 0.14317730069160461 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.13741648197174072, + "learning_rate": 9.50931497220416e-06, + "loss": 0.1451, + "step": 19766, + "teacher_loss": 0.1459297090768814 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.3024732768535614, + "learning_rate": 9.507201417205162e-06, + "loss": 0.1995, + "step": 19767, + "teacher_loss": 0.18808643519878387 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.3508206605911255, + "learning_rate": 9.50508798813957e-06, + "loss": 0.221, + "step": 19768, + "teacher_loss": 0.20655420422554016 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.4434640407562256, + "learning_rate": 9.502974685055835e-06, + "loss": 0.2214, + "step": 19769, + "teacher_loss": 0.19676610827445984 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.21700438857078552, + "learning_rate": 9.500861508002407e-06, + "loss": 0.2402, + "step": 19770, + "teacher_loss": 0.2427355945110321 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.28291788697242737, + "learning_rate": 9.49874845702774e-06, + "loss": 0.2072, + "step": 19771, + "teacher_loss": 0.19881662726402283 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.34595683217048645, + "learning_rate": 9.496635532180281e-06, + "loss": 0.1931, + "step": 19772, + "teacher_loss": 0.1761069893836975 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.45131373405456543, + "learning_rate": 9.494522733508459e-06, + "loss": 0.2085, + "step": 19773, + "teacher_loss": 0.1815440058708191 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.4023161828517914, + "learning_rate": 9.492410061060731e-06, + "loss": 0.2319, + "step": 19774, + "teacher_loss": 0.21292363107204437 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.2056112289428711, + "learning_rate": 9.490297514885533e-06, + "loss": 0.1888, + "step": 19775, + "teacher_loss": 0.1868990957736969 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.5489335656166077, + "learning_rate": 9.488185095031283e-06, + "loss": 0.2291, + "step": 19776, + "teacher_loss": 0.19359859824180603 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.432267427444458, + "learning_rate": 9.486072801546433e-06, + "loss": 0.353, + "step": 19777, + "teacher_loss": 0.3442351818084717 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.6630464196205139, + "learning_rate": 9.483960634479399e-06, + "loss": 0.2625, + "step": 19778, + "teacher_loss": 0.2180473506450653 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.36091816425323486, + "learning_rate": 9.481848593878605e-06, + "loss": 0.2313, + "step": 19779, + "teacher_loss": 0.21692229807376862 + }, + { + "compression_loss": 0.0, + "epoch": 3.57, + "label_loss": 0.16834931075572968, + "learning_rate": 9.479736679792484e-06, + "loss": 0.1688, + "step": 19780, + "teacher_loss": 0.1688673347234726 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.32525840401649475, + "learning_rate": 9.47762489226945e-06, + "loss": 0.3969, + "step": 19781, + "teacher_loss": 0.40490737557411194 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.35260459780693054, + "learning_rate": 9.475513231357917e-06, + "loss": 0.236, + "step": 19782, + "teacher_loss": 0.22307217121124268 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4594229459762573, + "learning_rate": 9.4734016971063e-06, + "loss": 0.2273, + "step": 19783, + "teacher_loss": 0.20146408677101135 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.2580691874027252, + "learning_rate": 9.471290289563019e-06, + "loss": 0.2617, + "step": 19784, + "teacher_loss": 0.26212555170059204 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.14252987504005432, + "learning_rate": 9.469179008776478e-06, + "loss": 0.1776, + "step": 19785, + "teacher_loss": 0.18147867918014526 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.13024646043777466, + "learning_rate": 9.46706785479507e-06, + "loss": 0.1852, + "step": 19786, + "teacher_loss": 0.1913457214832306 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5175319314002991, + "learning_rate": 9.464956827667216e-06, + "loss": 0.2593, + "step": 19787, + "teacher_loss": 0.23060579597949982 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4064214825630188, + "learning_rate": 9.462845927441304e-06, + "loss": 0.1978, + "step": 19788, + "teacher_loss": 0.17465084791183472 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.38115522265434265, + "learning_rate": 9.46073515416573e-06, + "loss": 0.2661, + "step": 19789, + "teacher_loss": 0.2533676326274872 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.3713221848011017, + "learning_rate": 9.458624507888897e-06, + "loss": 0.2685, + "step": 19790, + "teacher_loss": 0.2570367455482483 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4722519814968109, + "learning_rate": 9.45651398865919e-06, + "loss": 0.2489, + "step": 19791, + "teacher_loss": 0.2241356074810028 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.7023818492889404, + "learning_rate": 9.454403596524991e-06, + "loss": 0.4036, + "step": 19792, + "teacher_loss": 0.3704553246498108 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.7064534425735474, + "learning_rate": 9.452293331534696e-06, + "loss": 0.2968, + "step": 19793, + "teacher_loss": 0.2512907385826111 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5741615891456604, + "learning_rate": 9.450183193736682e-06, + "loss": 0.2205, + "step": 19794, + "teacher_loss": 0.18122228980064392 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.6621631383895874, + "learning_rate": 9.448073183179326e-06, + "loss": 0.213, + "step": 19795, + "teacher_loss": 0.1630450189113617 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.2787085771560669, + "learning_rate": 9.445963299911007e-06, + "loss": 0.1785, + "step": 19796, + "teacher_loss": 0.16735535860061646 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4371373951435089, + "learning_rate": 9.443853543980101e-06, + "loss": 0.2717, + "step": 19797, + "teacher_loss": 0.2533169984817505 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.1518123596906662, + "learning_rate": 9.441743915434967e-06, + "loss": 0.1826, + "step": 19798, + "teacher_loss": 0.1860014796257019 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.7233725786209106, + "learning_rate": 9.439634414323987e-06, + "loss": 0.2851, + "step": 19799, + "teacher_loss": 0.23637276887893677 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4535370469093323, + "learning_rate": 9.43752504069552e-06, + "loss": 0.2725, + "step": 19800, + "teacher_loss": 0.25242918729782104 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.24376991391181946, + "learning_rate": 9.435415794597919e-06, + "loss": 0.2056, + "step": 19801, + "teacher_loss": 0.20139597356319427 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.8627008199691772, + "learning_rate": 9.433306676079562e-06, + "loss": 0.372, + "step": 19802, + "teacher_loss": 0.31751590967178345 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.3033873736858368, + "learning_rate": 9.431197685188785e-06, + "loss": 0.2309, + "step": 19803, + "teacher_loss": 0.22280624508857727 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.36360687017440796, + "learning_rate": 9.429088821973953e-06, + "loss": 0.2498, + "step": 19804, + "teacher_loss": 0.2371712028980255 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5851638317108154, + "learning_rate": 9.426980086483407e-06, + "loss": 0.2976, + "step": 19805, + "teacher_loss": 0.2656567096710205 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.25133275985717773, + "learning_rate": 9.424871478765503e-06, + "loss": 0.1911, + "step": 19806, + "teacher_loss": 0.18446116149425507 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.2726135849952698, + "learning_rate": 9.422762998868579e-06, + "loss": 0.2081, + "step": 19807, + "teacher_loss": 0.2009601891040802 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5450992584228516, + "learning_rate": 9.420654646840974e-06, + "loss": 0.3117, + "step": 19808, + "teacher_loss": 0.2857212424278259 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.42858901619911194, + "learning_rate": 9.418546422731037e-06, + "loss": 0.2175, + "step": 19809, + "teacher_loss": 0.19407512247562408 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.542395830154419, + "learning_rate": 9.4164383265871e-06, + "loss": 0.2459, + "step": 19810, + "teacher_loss": 0.2129519134759903 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.487173855304718, + "learning_rate": 9.41433035845748e-06, + "loss": 0.2589, + "step": 19811, + "teacher_loss": 0.23353135585784912 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.27700042724609375, + "learning_rate": 9.412222518390526e-06, + "loss": 0.2371, + "step": 19812, + "teacher_loss": 0.23265469074249268 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4004698097705841, + "learning_rate": 9.410114806434555e-06, + "loss": 0.2487, + "step": 19813, + "teacher_loss": 0.23188698291778564 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.6729552149772644, + "learning_rate": 9.408007222637892e-06, + "loss": 0.2735, + "step": 19814, + "teacher_loss": 0.22916674613952637 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.3781355619430542, + "learning_rate": 9.405899767048856e-06, + "loss": 0.2254, + "step": 19815, + "teacher_loss": 0.20846641063690186 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.17719748616218567, + "learning_rate": 9.403792439715768e-06, + "loss": 0.1645, + "step": 19816, + "teacher_loss": 0.16308313608169556 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.517128050327301, + "learning_rate": 9.40168524068694e-06, + "loss": 0.2487, + "step": 19817, + "teacher_loss": 0.21883654594421387 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4026499390602112, + "learning_rate": 9.399578170010685e-06, + "loss": 0.2319, + "step": 19818, + "teacher_loss": 0.21290743350982666 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5714460015296936, + "learning_rate": 9.397471227735313e-06, + "loss": 0.2312, + "step": 19819, + "teacher_loss": 0.1934167593717575 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.8639388084411621, + "learning_rate": 9.395364413909126e-06, + "loss": 0.2548, + "step": 19820, + "teacher_loss": 0.1871698796749115 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.16402670741081238, + "learning_rate": 9.393257728580432e-06, + "loss": 0.2029, + "step": 19821, + "teacher_loss": 0.20721864700317383 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4520057141780853, + "learning_rate": 9.391151171797531e-06, + "loss": 0.232, + "step": 19822, + "teacher_loss": 0.2075609564781189 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.3670817017555237, + "learning_rate": 9.389044743608716e-06, + "loss": 0.1519, + "step": 19823, + "teacher_loss": 0.12797099351882935 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.950454831123352, + "learning_rate": 9.386938444062275e-06, + "loss": 0.3227, + "step": 19824, + "teacher_loss": 0.25294923782348633 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.2584499418735504, + "learning_rate": 9.384832273206514e-06, + "loss": 0.2518, + "step": 19825, + "teacher_loss": 0.2510247230529785 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5860365629196167, + "learning_rate": 9.382726231089714e-06, + "loss": 0.4688, + "step": 19826, + "teacher_loss": 0.45581158995628357 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5522368550300598, + "learning_rate": 9.380620317760156e-06, + "loss": 0.2177, + "step": 19827, + "teacher_loss": 0.1804906129837036 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.5885094404220581, + "learning_rate": 9.378514533266131e-06, + "loss": 0.2393, + "step": 19828, + "teacher_loss": 0.20053012669086456 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.43129289150238037, + "learning_rate": 9.37640887765591e-06, + "loss": 0.2453, + "step": 19829, + "teacher_loss": 0.22465723752975464 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.09199099242687225, + "learning_rate": 9.374303350977772e-06, + "loss": 0.1513, + "step": 19830, + "teacher_loss": 0.1579378843307495 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.2707913815975189, + "learning_rate": 9.372197953279995e-06, + "loss": 0.1628, + "step": 19831, + "teacher_loss": 0.15074864029884338 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.2685883343219757, + "learning_rate": 9.370092684610844e-06, + "loss": 0.2421, + "step": 19832, + "teacher_loss": 0.23920132219791412 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 1.0313339233398438, + "learning_rate": 9.367987545018588e-06, + "loss": 0.2631, + "step": 19833, + "teacher_loss": 0.17778009176254272 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.4770818054676056, + "learning_rate": 9.365882534551494e-06, + "loss": 0.245, + "step": 19834, + "teacher_loss": 0.21923308074474335 + }, + { + "compression_loss": 0.0, + "epoch": 3.58, + "label_loss": 0.26384660601615906, + "learning_rate": 9.363777653257823e-06, + "loss": 0.1777, + "step": 19835, + "teacher_loss": 0.16809137165546417 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.2815355956554413, + "learning_rate": 9.361672901185824e-06, + "loss": 0.2293, + "step": 19836, + "teacher_loss": 0.22352814674377441 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.7137065529823303, + "learning_rate": 9.35956827838377e-06, + "loss": 0.3584, + "step": 19837, + "teacher_loss": 0.31895363330841064 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.26560086011886597, + "learning_rate": 9.357463784899901e-06, + "loss": 0.228, + "step": 19838, + "teacher_loss": 0.22382019460201263 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.5089600086212158, + "learning_rate": 9.35535942078247e-06, + "loss": 0.2875, + "step": 19839, + "teacher_loss": 0.26288866996765137 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.49309974908828735, + "learning_rate": 9.353255186079722e-06, + "loss": 0.2206, + "step": 19840, + "teacher_loss": 0.1902964562177658 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.51983243227005, + "learning_rate": 9.351151080839908e-06, + "loss": 0.3306, + "step": 19841, + "teacher_loss": 0.3095444440841675 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.49582162499427795, + "learning_rate": 9.349047105111258e-06, + "loss": 0.2716, + "step": 19842, + "teacher_loss": 0.24669964611530304 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.1670864373445511, + "learning_rate": 9.346943258942019e-06, + "loss": 0.1483, + "step": 19843, + "teacher_loss": 0.14618347585201263 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.21581795811653137, + "learning_rate": 9.34483954238042e-06, + "loss": 0.155, + "step": 19844, + "teacher_loss": 0.14826829731464386 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.2758815884590149, + "learning_rate": 9.3427359554747e-06, + "loss": 0.2416, + "step": 19845, + "teacher_loss": 0.2378234565258026 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.1983284056186676, + "learning_rate": 9.340632498273073e-06, + "loss": 0.1586, + "step": 19846, + "teacher_loss": 0.154202401638031 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.31562134623527527, + "learning_rate": 9.338529170823787e-06, + "loss": 0.3529, + "step": 19847, + "teacher_loss": 0.35700684785842896 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.4613686203956604, + "learning_rate": 9.336425973175048e-06, + "loss": 0.3214, + "step": 19848, + "teacher_loss": 0.3058563768863678 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.22698768973350525, + "learning_rate": 9.334322905375077e-06, + "loss": 0.1709, + "step": 19849, + "teacher_loss": 0.16466376185417175 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.16260185837745667, + "learning_rate": 9.332219967472102e-06, + "loss": 0.1651, + "step": 19850, + "teacher_loss": 0.16540908813476562 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.10824403166770935, + "learning_rate": 9.33011715951433e-06, + "loss": 0.1614, + "step": 19851, + "teacher_loss": 0.16731780767440796 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.64410799741745, + "learning_rate": 9.328014481549965e-06, + "loss": 0.2314, + "step": 19852, + "teacher_loss": 0.18554972112178802 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3166159689426422, + "learning_rate": 9.325911933627228e-06, + "loss": 0.184, + "step": 19853, + "teacher_loss": 0.16922476887702942 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3437650203704834, + "learning_rate": 9.32380951579432e-06, + "loss": 0.3179, + "step": 19854, + "teacher_loss": 0.3149966299533844 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.6198545694351196, + "learning_rate": 9.321707228099437e-06, + "loss": 0.6113, + "step": 19855, + "teacher_loss": 0.61036217212677 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.2969365417957306, + "learning_rate": 9.319605070590786e-06, + "loss": 0.2365, + "step": 19856, + "teacher_loss": 0.2297685742378235 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.4402504861354828, + "learning_rate": 9.317503043316559e-06, + "loss": 0.4914, + "step": 19857, + "teacher_loss": 0.4970824718475342 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.2952881455421448, + "learning_rate": 9.315401146324948e-06, + "loss": 0.2649, + "step": 19858, + "teacher_loss": 0.2615756690502167 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.2437678575515747, + "learning_rate": 9.31329937966415e-06, + "loss": 0.1743, + "step": 19859, + "teacher_loss": 0.16660669445991516 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3402618169784546, + "learning_rate": 9.311197743382349e-06, + "loss": 0.1929, + "step": 19860, + "teacher_loss": 0.1765081286430359 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.4157627522945404, + "learning_rate": 9.30909623752772e-06, + "loss": 0.2332, + "step": 19861, + "teacher_loss": 0.21289071440696716 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 1.119473934173584, + "learning_rate": 9.30699486214846e-06, + "loss": 0.3305, + "step": 19862, + "teacher_loss": 0.2427879273891449 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.19123497605323792, + "learning_rate": 9.304893617292737e-06, + "loss": 0.1757, + "step": 19863, + "teacher_loss": 0.17402340471744537 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.5166798830032349, + "learning_rate": 9.302792503008725e-06, + "loss": 0.3608, + "step": 19864, + "teacher_loss": 0.3434467315673828 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.5520679950714111, + "learning_rate": 9.300691519344602e-06, + "loss": 0.23, + "step": 19865, + "teacher_loss": 0.1942073106765747 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.2275848388671875, + "learning_rate": 9.298590666348536e-06, + "loss": 0.2306, + "step": 19866, + "teacher_loss": 0.23096546530723572 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3816664218902588, + "learning_rate": 9.29648994406869e-06, + "loss": 0.2496, + "step": 19867, + "teacher_loss": 0.2349148392677307 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.5917420387268066, + "learning_rate": 9.294389352553231e-06, + "loss": 0.2197, + "step": 19868, + "teacher_loss": 0.178411602973938 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3821257948875427, + "learning_rate": 9.292288891850317e-06, + "loss": 0.209, + "step": 19869, + "teacher_loss": 0.1897328794002533 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.54078608751297, + "learning_rate": 9.290188562008112e-06, + "loss": 0.2701, + "step": 19870, + "teacher_loss": 0.2400021255016327 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.40267056226730347, + "learning_rate": 9.288088363074754e-06, + "loss": 0.2357, + "step": 19871, + "teacher_loss": 0.21710573136806488 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.8273252248764038, + "learning_rate": 9.285988295098414e-06, + "loss": 0.2599, + "step": 19872, + "teacher_loss": 0.1968739777803421 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.4414958655834198, + "learning_rate": 9.283888358127227e-06, + "loss": 0.2137, + "step": 19873, + "teacher_loss": 0.1884109079837799 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3258158564567566, + "learning_rate": 9.281788552209337e-06, + "loss": 0.2718, + "step": 19874, + "teacher_loss": 0.2658481299877167 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.46209263801574707, + "learning_rate": 9.2796888773929e-06, + "loss": 0.294, + "step": 19875, + "teacher_loss": 0.27526938915252686 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.8330816030502319, + "learning_rate": 9.277589333726044e-06, + "loss": 0.2552, + "step": 19876, + "teacher_loss": 0.19099751114845276 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.9506896734237671, + "learning_rate": 9.275489921256904e-06, + "loss": 0.2608, + "step": 19877, + "teacher_loss": 0.18415045738220215 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.5601963996887207, + "learning_rate": 9.273390640033622e-06, + "loss": 0.2398, + "step": 19878, + "teacher_loss": 0.20425289869308472 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.5329597592353821, + "learning_rate": 9.271291490104323e-06, + "loss": 0.2155, + "step": 19879, + "teacher_loss": 0.1802772879600525 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.6433287858963013, + "learning_rate": 9.26919247151713e-06, + "loss": 0.438, + "step": 19880, + "teacher_loss": 0.41513144969940186 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.545869767665863, + "learning_rate": 9.267093584320175e-06, + "loss": 0.2319, + "step": 19881, + "teacher_loss": 0.1970662623643875 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.42615216970443726, + "learning_rate": 9.264994828561577e-06, + "loss": 0.2315, + "step": 19882, + "teacher_loss": 0.20986221730709076 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.36910292506217957, + "learning_rate": 9.262896204289449e-06, + "loss": 0.2232, + "step": 19883, + "teacher_loss": 0.2069844901561737 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.9538236856460571, + "learning_rate": 9.260797711551914e-06, + "loss": 0.3383, + "step": 19884, + "teacher_loss": 0.26992595195770264 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.331133097410202, + "learning_rate": 9.258699350397083e-06, + "loss": 0.1709, + "step": 19885, + "teacher_loss": 0.15311212837696075 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.36773788928985596, + "learning_rate": 9.256601120873062e-06, + "loss": 0.1993, + "step": 19886, + "teacher_loss": 0.18053176999092102 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.43046027421951294, + "learning_rate": 9.254503023027952e-06, + "loss": 0.2001, + "step": 19887, + "teacher_loss": 0.17452047765254974 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.7361464500427246, + "learning_rate": 9.252405056909865e-06, + "loss": 0.4444, + "step": 19888, + "teacher_loss": 0.4119475781917572 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.3213765621185303, + "learning_rate": 9.250307222566898e-06, + "loss": 0.2163, + "step": 19889, + "teacher_loss": 0.20458675920963287 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.51975017786026, + "learning_rate": 9.248209520047145e-06, + "loss": 0.1932, + "step": 19890, + "teacher_loss": 0.15693068504333496 + }, + { + "compression_loss": 0.0, + "epoch": 3.59, + "label_loss": 0.44473791122436523, + "learning_rate": 9.246111949398708e-06, + "loss": 0.2854, + "step": 19891, + "teacher_loss": 0.26768457889556885 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3298566937446594, + "learning_rate": 9.24401451066967e-06, + "loss": 0.2489, + "step": 19892, + "teacher_loss": 0.2398521602153778 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3874850273132324, + "learning_rate": 9.24191720390812e-06, + "loss": 0.1845, + "step": 19893, + "teacher_loss": 0.16198942065238953 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 1.0310026407241821, + "learning_rate": 9.239820029162149e-06, + "loss": 0.3011, + "step": 19894, + "teacher_loss": 0.22003161907196045 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.47316843271255493, + "learning_rate": 9.237722986479836e-06, + "loss": 0.1993, + "step": 19895, + "teacher_loss": 0.168876051902771 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5754532217979431, + "learning_rate": 9.235626075909251e-06, + "loss": 0.2359, + "step": 19896, + "teacher_loss": 0.19813281297683716 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5692824721336365, + "learning_rate": 9.233529297498483e-06, + "loss": 0.2061, + "step": 19897, + "teacher_loss": 0.16571253538131714 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6581144332885742, + "learning_rate": 9.231432651295602e-06, + "loss": 0.4084, + "step": 19898, + "teacher_loss": 0.3806276321411133 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.4792437255382538, + "learning_rate": 9.22933613734867e-06, + "loss": 0.2515, + "step": 19899, + "teacher_loss": 0.22615377604961395 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3260572850704193, + "learning_rate": 9.227239755705762e-06, + "loss": 0.2347, + "step": 19900, + "teacher_loss": 0.2245473563671112 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.1858081817626953, + "learning_rate": 9.225143506414938e-06, + "loss": 0.1903, + "step": 19901, + "teacher_loss": 0.19077607989311218 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.31750157475471497, + "learning_rate": 9.223047389524259e-06, + "loss": 0.4107, + "step": 19902, + "teacher_loss": 0.42110931873321533 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6247704029083252, + "learning_rate": 9.220951405081782e-06, + "loss": 0.228, + "step": 19903, + "teacher_loss": 0.18392664194107056 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5406287908554077, + "learning_rate": 9.218855553135567e-06, + "loss": 0.3534, + "step": 19904, + "teacher_loss": 0.3326292335987091 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.519968569278717, + "learning_rate": 9.216759833733657e-06, + "loss": 0.2213, + "step": 19905, + "teacher_loss": 0.18807634711265564 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.23802652955055237, + "learning_rate": 9.214664246924105e-06, + "loss": 0.1857, + "step": 19906, + "teacher_loss": 0.17983950674533844 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.4944468140602112, + "learning_rate": 9.21256879275496e-06, + "loss": 0.2778, + "step": 19907, + "teacher_loss": 0.2537689208984375 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.46359485387802124, + "learning_rate": 9.210473471274262e-06, + "loss": 0.2492, + "step": 19908, + "teacher_loss": 0.22541838884353638 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3357314467430115, + "learning_rate": 9.208378282530041e-06, + "loss": 0.218, + "step": 19909, + "teacher_loss": 0.20494595170021057 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.7142949104309082, + "learning_rate": 9.206283226570352e-06, + "loss": 0.272, + "step": 19910, + "teacher_loss": 0.2228180319070816 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.43322238326072693, + "learning_rate": 9.204188303443214e-06, + "loss": 0.2157, + "step": 19911, + "teacher_loss": 0.19153130054473877 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.7269574403762817, + "learning_rate": 9.202093513196658e-06, + "loss": 0.2401, + "step": 19912, + "teacher_loss": 0.18598949909210205 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.16784697771072388, + "learning_rate": 9.199998855878719e-06, + "loss": 0.1972, + "step": 19913, + "teacher_loss": 0.2004975974559784 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.4637722969055176, + "learning_rate": 9.197904331537416e-06, + "loss": 0.202, + "step": 19914, + "teacher_loss": 0.17293208837509155 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.21799816191196442, + "learning_rate": 9.195809940220768e-06, + "loss": 0.1607, + "step": 19915, + "teacher_loss": 0.15434104204177856 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.24566230177879333, + "learning_rate": 9.193715681976801e-06, + "loss": 0.2661, + "step": 19916, + "teacher_loss": 0.2684234082698822 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5468559265136719, + "learning_rate": 9.191621556853523e-06, + "loss": 0.2143, + "step": 19917, + "teacher_loss": 0.17740073800086975 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.12039814889431, + "learning_rate": 9.189527564898947e-06, + "loss": 0.1622, + "step": 19918, + "teacher_loss": 0.1668863594532013 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.36444589495658875, + "learning_rate": 9.187433706161087e-06, + "loss": 0.1667, + "step": 19919, + "teacher_loss": 0.14473843574523926 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.20830503106117249, + "learning_rate": 9.185339980687943e-06, + "loss": 0.1788, + "step": 19920, + "teacher_loss": 0.17554441094398499 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6041305065155029, + "learning_rate": 9.183246388527516e-06, + "loss": 0.2897, + "step": 19921, + "teacher_loss": 0.25479745864868164 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6556446552276611, + "learning_rate": 9.181152929727813e-06, + "loss": 0.3241, + "step": 19922, + "teacher_loss": 0.28728777170181274 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.2636571526527405, + "learning_rate": 9.179059604336833e-06, + "loss": 0.1364, + "step": 19923, + "teacher_loss": 0.12225516140460968 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.2800932228565216, + "learning_rate": 9.176966412402553e-06, + "loss": 0.2066, + "step": 19924, + "teacher_loss": 0.1984708607196808 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.28558629751205444, + "learning_rate": 9.174873353972987e-06, + "loss": 0.2239, + "step": 19925, + "teacher_loss": 0.21703684329986572 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.35899457335472107, + "learning_rate": 9.172780429096103e-06, + "loss": 0.1899, + "step": 19926, + "teacher_loss": 0.17116200923919678 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.2219620943069458, + "learning_rate": 9.170687637819897e-06, + "loss": 0.2229, + "step": 19927, + "teacher_loss": 0.22297261655330658 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3342660963535309, + "learning_rate": 9.168594980192341e-06, + "loss": 0.2703, + "step": 19928, + "teacher_loss": 0.263232946395874 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.4044509828090668, + "learning_rate": 9.16650245626142e-06, + "loss": 0.1765, + "step": 19929, + "teacher_loss": 0.15112614631652832 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3868674635887146, + "learning_rate": 9.164410066075111e-06, + "loss": 0.1629, + "step": 19930, + "teacher_loss": 0.13806472718715668 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.3807605803012848, + "learning_rate": 9.162317809681378e-06, + "loss": 0.2189, + "step": 19931, + "teacher_loss": 0.20092439651489258 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5362415909767151, + "learning_rate": 9.1602256871282e-06, + "loss": 0.1804, + "step": 19932, + "teacher_loss": 0.14086347818374634 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.27870625257492065, + "learning_rate": 9.158133698463541e-06, + "loss": 0.2415, + "step": 19933, + "teacher_loss": 0.23740097880363464 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.34084075689315796, + "learning_rate": 9.156041843735352e-06, + "loss": 0.1836, + "step": 19934, + "teacher_loss": 0.16613787412643433 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.29510897397994995, + "learning_rate": 9.15395012299161e-06, + "loss": 0.2091, + "step": 19935, + "teacher_loss": 0.19958528876304626 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.4561639428138733, + "learning_rate": 9.151858536280265e-06, + "loss": 0.2342, + "step": 19936, + "teacher_loss": 0.20948924124240875 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.9261264204978943, + "learning_rate": 9.149767083649265e-06, + "loss": 0.4245, + "step": 19937, + "teacher_loss": 0.36876028776168823 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6204567551612854, + "learning_rate": 9.14767576514657e-06, + "loss": 0.2135, + "step": 19938, + "teacher_loss": 0.16823957860469818 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.31686142086982727, + "learning_rate": 9.145584580820122e-06, + "loss": 0.1877, + "step": 19939, + "teacher_loss": 0.1733289659023285 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5108184218406677, + "learning_rate": 9.143493530717865e-06, + "loss": 0.2267, + "step": 19940, + "teacher_loss": 0.19515544176101685 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.27669328451156616, + "learning_rate": 9.141402614887745e-06, + "loss": 0.1674, + "step": 19941, + "teacher_loss": 0.15526896715164185 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.465438574552536, + "learning_rate": 9.1393118333777e-06, + "loss": 0.202, + "step": 19942, + "teacher_loss": 0.17277663946151733 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.5035274028778076, + "learning_rate": 9.137221186235657e-06, + "loss": 0.2347, + "step": 19943, + "teacher_loss": 0.20480364561080933 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6934943199157715, + "learning_rate": 9.13513067350956e-06, + "loss": 0.3728, + "step": 19944, + "teacher_loss": 0.33720535039901733 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.6449599862098694, + "learning_rate": 9.133040295247335e-06, + "loss": 0.2066, + "step": 19945, + "teacher_loss": 0.15785589814186096 + }, + { + "compression_loss": 0.0, + "epoch": 3.6, + "label_loss": 0.4059029221534729, + "learning_rate": 9.130950051496897e-06, + "loss": 0.2128, + "step": 19946, + "teacher_loss": 0.19134293496608734 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.547698974609375, + "learning_rate": 9.128859942306184e-06, + "loss": 0.2111, + "step": 19947, + "teacher_loss": 0.17367462813854218 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.904839038848877, + "learning_rate": 9.126769967723115e-06, + "loss": 0.2407, + "step": 19948, + "teacher_loss": 0.16695371270179749 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.4810352921485901, + "learning_rate": 9.124680127795596e-06, + "loss": 0.2789, + "step": 19949, + "teacher_loss": 0.2564719319343567 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.18367794156074524, + "learning_rate": 9.122590422571545e-06, + "loss": 0.2208, + "step": 19950, + "teacher_loss": 0.22494328022003174 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.8472585082054138, + "learning_rate": 9.120500852098877e-06, + "loss": 0.3032, + "step": 19951, + "teacher_loss": 0.24278613924980164 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.5415215492248535, + "learning_rate": 9.118411416425496e-06, + "loss": 0.2214, + "step": 19952, + "teacher_loss": 0.18588018417358398 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.3888516426086426, + "learning_rate": 9.116322115599304e-06, + "loss": 0.1896, + "step": 19953, + "teacher_loss": 0.16741704940795898 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.22994567453861237, + "learning_rate": 9.114232949668211e-06, + "loss": 0.1882, + "step": 19954, + "teacher_loss": 0.1835949867963791 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.39963263273239136, + "learning_rate": 9.112143918680108e-06, + "loss": 0.2333, + "step": 19955, + "teacher_loss": 0.21482780575752258 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.3057858347892761, + "learning_rate": 9.11005502268289e-06, + "loss": 0.1839, + "step": 19956, + "teacher_loss": 0.17035634815692902 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.7031893134117126, + "learning_rate": 9.107966261724453e-06, + "loss": 0.2282, + "step": 19957, + "teacher_loss": 0.17537254095077515 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.6388310194015503, + "learning_rate": 9.10587763585269e-06, + "loss": 0.3069, + "step": 19958, + "teacher_loss": 0.27001726627349854 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.5127741098403931, + "learning_rate": 9.10378914511547e-06, + "loss": 0.2255, + "step": 19959, + "teacher_loss": 0.19358283281326294 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.5264831781387329, + "learning_rate": 9.101700789560698e-06, + "loss": 0.1808, + "step": 19960, + "teacher_loss": 0.14238545298576355 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 1.0926851034164429, + "learning_rate": 9.099612569236239e-06, + "loss": 0.3462, + "step": 19961, + "teacher_loss": 0.2632180154323578 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.5803426504135132, + "learning_rate": 9.09752448418997e-06, + "loss": 0.2774, + "step": 19962, + "teacher_loss": 0.2437680959701538 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.44937098026275635, + "learning_rate": 9.09543653446977e-06, + "loss": 0.234, + "step": 19963, + "teacher_loss": 0.2100919485092163 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.34498661756515503, + "learning_rate": 9.09334872012351e-06, + "loss": 0.383, + "step": 19964, + "teacher_loss": 0.3872228264808655 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.30845555663108826, + "learning_rate": 9.091261041199051e-06, + "loss": 0.1643, + "step": 19965, + "teacher_loss": 0.14832833409309387 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.8969360589981079, + "learning_rate": 9.089173497744263e-06, + "loss": 0.284, + "step": 19966, + "teacher_loss": 0.2159484326839447 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.6321310997009277, + "learning_rate": 9.087086089807005e-06, + "loss": 0.218, + "step": 19967, + "teacher_loss": 0.171972393989563 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.6322377324104309, + "learning_rate": 9.084998817435139e-06, + "loss": 0.239, + "step": 19968, + "teacher_loss": 0.19525766372680664 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.47490963339805603, + "learning_rate": 9.082911680676507e-06, + "loss": 0.2929, + "step": 19969, + "teacher_loss": 0.2727214992046356 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.692403256893158, + "learning_rate": 9.08082467957898e-06, + "loss": 0.325, + "step": 19970, + "teacher_loss": 0.2841499149799347 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.6878918409347534, + "learning_rate": 9.078737814190393e-06, + "loss": 0.4139, + "step": 19971, + "teacher_loss": 0.38343846797943115 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.8984094262123108, + "learning_rate": 9.076651084558588e-06, + "loss": 0.2838, + "step": 19972, + "teacher_loss": 0.215561181306839 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.16383318603038788, + "learning_rate": 9.074564490731424e-06, + "loss": 0.2152, + "step": 19973, + "teacher_loss": 0.22089210152626038 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.3293156623840332, + "learning_rate": 9.072478032756726e-06, + "loss": 0.205, + "step": 19974, + "teacher_loss": 0.19120121002197266 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.2958255410194397, + "learning_rate": 9.070391710682334e-06, + "loss": 0.1854, + "step": 19975, + "teacher_loss": 0.17310123145580292 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.3125835657119751, + "learning_rate": 9.068305524556086e-06, + "loss": 0.288, + "step": 19976, + "teacher_loss": 0.2853020131587982 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.7090179920196533, + "learning_rate": 9.066219474425808e-06, + "loss": 0.395, + "step": 19977, + "teacher_loss": 0.36016225814819336 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.8693034648895264, + "learning_rate": 9.064133560339322e-06, + "loss": 0.3069, + "step": 19978, + "teacher_loss": 0.2444513440132141 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.6638647317886353, + "learning_rate": 9.062047782344461e-06, + "loss": 0.3217, + "step": 19979, + "teacher_loss": 0.28368502855300903 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.6860822439193726, + "learning_rate": 9.059962140489041e-06, + "loss": 0.3384, + "step": 19980, + "teacher_loss": 0.2998095154762268 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.3258597254753113, + "learning_rate": 9.057876634820878e-06, + "loss": 0.2668, + "step": 19981, + "teacher_loss": 0.2601909339427948 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.37443482875823975, + "learning_rate": 9.05579126538779e-06, + "loss": 0.2316, + "step": 19982, + "teacher_loss": 0.21574074029922485 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.34044164419174194, + "learning_rate": 9.053706032237592e-06, + "loss": 0.3333, + "step": 19983, + "teacher_loss": 0.3325221836566925 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.26624196767807007, + "learning_rate": 9.051620935418075e-06, + "loss": 0.1917, + "step": 19984, + "teacher_loss": 0.18342649936676025 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.2921028137207031, + "learning_rate": 9.049535974977067e-06, + "loss": 0.1777, + "step": 19985, + "teacher_loss": 0.1649664044380188 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.2905924320220947, + "learning_rate": 9.047451150962354e-06, + "loss": 0.2437, + "step": 19986, + "teacher_loss": 0.2385289967060089 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.18643257021903992, + "learning_rate": 9.045366463421736e-06, + "loss": 0.1808, + "step": 19987, + "teacher_loss": 0.1801559329032898 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.9008071422576904, + "learning_rate": 9.043281912403016e-06, + "loss": 0.3188, + "step": 19988, + "teacher_loss": 0.2541596293449402 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.5576692223548889, + "learning_rate": 9.041197497953984e-06, + "loss": 0.2389, + "step": 19989, + "teacher_loss": 0.20346516370773315 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.3989705443382263, + "learning_rate": 9.039113220122426e-06, + "loss": 0.2483, + "step": 19990, + "teacher_loss": 0.23151624202728271 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.42784440517425537, + "learning_rate": 9.037029078956126e-06, + "loss": 0.2173, + "step": 19991, + "teacher_loss": 0.19386866688728333 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.4746456742286682, + "learning_rate": 9.034945074502879e-06, + "loss": 0.4454, + "step": 19992, + "teacher_loss": 0.44210803508758545 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.31803518533706665, + "learning_rate": 9.032861206810456e-06, + "loss": 0.2132, + "step": 19993, + "teacher_loss": 0.20155873894691467 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.1412222981452942, + "learning_rate": 9.03077747592663e-06, + "loss": 0.1887, + "step": 19994, + "teacher_loss": 0.19392287731170654 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.26981955766677856, + "learning_rate": 9.028693881899185e-06, + "loss": 0.1784, + "step": 19995, + "teacher_loss": 0.16822095215320587 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.43685564398765564, + "learning_rate": 9.026610424775885e-06, + "loss": 0.2381, + "step": 19996, + "teacher_loss": 0.21597439050674438 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.38053351640701294, + "learning_rate": 9.024527104604493e-06, + "loss": 0.3429, + "step": 19997, + "teacher_loss": 0.338720440864563 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.35880979895591736, + "learning_rate": 9.022443921432785e-06, + "loss": 0.1782, + "step": 19998, + "teacher_loss": 0.15809421241283417 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.4427030086517334, + "learning_rate": 9.020360875308518e-06, + "loss": 0.2476, + "step": 19999, + "teacher_loss": 0.2259388417005539 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.22023199498653412, + "learning_rate": 9.018277966279443e-06, + "loss": 0.2655, + "step": 20000, + "teacher_loss": 0.27047526836395264 + }, + { + "epoch": 3.61, + "eval_exact_match": 80.43519394512772, + "eval_f1": 87.77667688597502, + "step": 20000 + }, + { + "compression_loss": 0.0, + "epoch": 3.61, + "label_loss": 0.4198623299598694, + "learning_rate": 9.016195194393324e-06, + "loss": 0.2399, + "step": 20001, + "teacher_loss": 0.21993079781532288 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.5562000274658203, + "learning_rate": 9.014112559697908e-06, + "loss": 0.2588, + "step": 20002, + "teacher_loss": 0.22572064399719238 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.32476580142974854, + "learning_rate": 9.01203006224094e-06, + "loss": 0.2309, + "step": 20003, + "teacher_loss": 0.22042503952980042 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.34965893626213074, + "learning_rate": 9.009947702070175e-06, + "loss": 0.215, + "step": 20004, + "teacher_loss": 0.2000509351491928 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.1953909695148468, + "learning_rate": 9.00786547923335e-06, + "loss": 0.2654, + "step": 20005, + "teacher_loss": 0.2731941044330597 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.2841143310070038, + "learning_rate": 9.0057833937782e-06, + "loss": 0.2276, + "step": 20006, + "teacher_loss": 0.22130073606967926 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.2699645757675171, + "learning_rate": 9.003701445752468e-06, + "loss": 0.1719, + "step": 20007, + "teacher_loss": 0.1610306203365326 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.52392578125, + "learning_rate": 9.001619635203889e-06, + "loss": 0.3048, + "step": 20008, + "teacher_loss": 0.2804965674877167 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6863110065460205, + "learning_rate": 8.999537962180177e-06, + "loss": 0.4419, + "step": 20009, + "teacher_loss": 0.4147794842720032 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6442421674728394, + "learning_rate": 8.99745642672908e-06, + "loss": 0.2044, + "step": 20010, + "teacher_loss": 0.15548613667488098 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.45127272605895996, + "learning_rate": 8.995375028898305e-06, + "loss": 0.2959, + "step": 20011, + "teacher_loss": 0.278641939163208 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.41365790367126465, + "learning_rate": 8.99329376873558e-06, + "loss": 0.1769, + "step": 20012, + "teacher_loss": 0.15058261156082153 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.3105650246143341, + "learning_rate": 8.991212646288615e-06, + "loss": 0.1974, + "step": 20013, + "teacher_loss": 0.18481430411338806 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.42537838220596313, + "learning_rate": 8.989131661605133e-06, + "loss": 0.2173, + "step": 20014, + "teacher_loss": 0.1942179799079895 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.7166765332221985, + "learning_rate": 8.987050814732839e-06, + "loss": 0.2258, + "step": 20015, + "teacher_loss": 0.17123952507972717 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6444005966186523, + "learning_rate": 8.98497010571944e-06, + "loss": 0.2643, + "step": 20016, + "teacher_loss": 0.22210508584976196 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.567467451095581, + "learning_rate": 8.982889534612646e-06, + "loss": 0.1974, + "step": 20017, + "teacher_loss": 0.15623821318149567 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.28660139441490173, + "learning_rate": 8.980809101460158e-06, + "loss": 0.226, + "step": 20018, + "teacher_loss": 0.2192680835723877 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.8865457773208618, + "learning_rate": 8.97872880630966e-06, + "loss": 0.2889, + "step": 20019, + "teacher_loss": 0.2225276231765747 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.3245702087879181, + "learning_rate": 8.976648649208866e-06, + "loss": 0.2031, + "step": 20020, + "teacher_loss": 0.18959203362464905 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.7300923466682434, + "learning_rate": 8.974568630205462e-06, + "loss": 0.2439, + "step": 20021, + "teacher_loss": 0.1898818016052246 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.33470776677131653, + "learning_rate": 8.972488749347126e-06, + "loss": 0.2251, + "step": 20022, + "teacher_loss": 0.2129203975200653 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.19618995487689972, + "learning_rate": 8.970409006681558e-06, + "loss": 0.2345, + "step": 20023, + "teacher_loss": 0.23874971270561218 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.39174938201904297, + "learning_rate": 8.968329402256432e-06, + "loss": 0.2273, + "step": 20024, + "teacher_loss": 0.20899301767349243 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.4898705780506134, + "learning_rate": 8.966249936119425e-06, + "loss": 0.2364, + "step": 20025, + "teacher_loss": 0.2082604467868805 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.49955886602401733, + "learning_rate": 8.964170608318222e-06, + "loss": 0.2584, + "step": 20026, + "teacher_loss": 0.23161479830741882 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.26668012142181396, + "learning_rate": 8.962091418900488e-06, + "loss": 0.3176, + "step": 20027, + "teacher_loss": 0.32322514057159424 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6411541700363159, + "learning_rate": 8.960012367913894e-06, + "loss": 0.2583, + "step": 20028, + "teacher_loss": 0.21577411890029907 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6308726668357849, + "learning_rate": 8.957933455406112e-06, + "loss": 0.2711, + "step": 20029, + "teacher_loss": 0.23112300038337708 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.28858649730682373, + "learning_rate": 8.955854681424797e-06, + "loss": 0.2131, + "step": 20030, + "teacher_loss": 0.20475052297115326 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.5992348194122314, + "learning_rate": 8.953776046017618e-06, + "loss": 0.2238, + "step": 20031, + "teacher_loss": 0.18214033544063568 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.4789152443408966, + "learning_rate": 8.951697549232217e-06, + "loss": 0.2628, + "step": 20032, + "teacher_loss": 0.23875531554222107 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6461098194122314, + "learning_rate": 8.949619191116268e-06, + "loss": 0.2291, + "step": 20033, + "teacher_loss": 0.18272440135478973 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.3495197594165802, + "learning_rate": 8.947540971717406e-06, + "loss": 0.1799, + "step": 20034, + "teacher_loss": 0.1610354483127594 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.3763784170150757, + "learning_rate": 8.94546289108328e-06, + "loss": 0.2, + "step": 20035, + "teacher_loss": 0.18040016293525696 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.38527852296829224, + "learning_rate": 8.943384949261544e-06, + "loss": 0.1897, + "step": 20036, + "teacher_loss": 0.16793614625930786 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.23364733159542084, + "learning_rate": 8.94130714629983e-06, + "loss": 0.2265, + "step": 20037, + "teacher_loss": 0.2257448434829712 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6762915849685669, + "learning_rate": 8.939229482245774e-06, + "loss": 0.2612, + "step": 20038, + "teacher_loss": 0.215114563703537 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.48530444502830505, + "learning_rate": 8.937151957147021e-06, + "loss": 0.2778, + "step": 20039, + "teacher_loss": 0.2547089457511902 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.2480117827653885, + "learning_rate": 8.935074571051194e-06, + "loss": 0.1717, + "step": 20040, + "teacher_loss": 0.1632324457168579 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.6173408031463623, + "learning_rate": 8.93299732400592e-06, + "loss": 0.2926, + "step": 20041, + "teacher_loss": 0.2565540671348572 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.15449869632720947, + "learning_rate": 8.930920216058832e-06, + "loss": 0.1756, + "step": 20042, + "teacher_loss": 0.17798486351966858 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 1.4325318336486816, + "learning_rate": 8.92884324725755e-06, + "loss": 0.3504, + "step": 20043, + "teacher_loss": 0.2301802784204483 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.5709894895553589, + "learning_rate": 8.92676641764968e-06, + "loss": 0.3331, + "step": 20044, + "teacher_loss": 0.3066791892051697 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.9169344305992126, + "learning_rate": 8.924689727282856e-06, + "loss": 0.3521, + "step": 20045, + "teacher_loss": 0.2893841564655304 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.21883299946784973, + "learning_rate": 8.922613176204685e-06, + "loss": 0.206, + "step": 20046, + "teacher_loss": 0.20453500747680664 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.557162880897522, + "learning_rate": 8.920536764462762e-06, + "loss": 0.2518, + "step": 20047, + "teacher_loss": 0.21787531673908234 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.33095499873161316, + "learning_rate": 8.918460492104712e-06, + "loss": 0.1349, + "step": 20048, + "teacher_loss": 0.11316149681806564 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.7031446695327759, + "learning_rate": 8.916384359178127e-06, + "loss": 0.2771, + "step": 20049, + "teacher_loss": 0.22977030277252197 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.19005897641181946, + "learning_rate": 8.914308365730608e-06, + "loss": 0.2343, + "step": 20050, + "teacher_loss": 0.23919777572155 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.3658381700515747, + "learning_rate": 8.912232511809753e-06, + "loss": 0.2543, + "step": 20051, + "teacher_loss": 0.24188114702701569 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.37547972798347473, + "learning_rate": 8.910156797463156e-06, + "loss": 0.2404, + "step": 20052, + "teacher_loss": 0.22537273168563843 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.7734442353248596, + "learning_rate": 8.908081222738403e-06, + "loss": 0.2849, + "step": 20053, + "teacher_loss": 0.23065117001533508 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.31264474987983704, + "learning_rate": 8.906005787683083e-06, + "loss": 0.1596, + "step": 20054, + "teacher_loss": 0.1426108181476593 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.3375699818134308, + "learning_rate": 8.90393049234478e-06, + "loss": 0.1823, + "step": 20055, + "teacher_loss": 0.16508419811725616 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 1.3302650451660156, + "learning_rate": 8.90185533677108e-06, + "loss": 0.7684, + "step": 20056, + "teacher_loss": 0.7059221863746643 + }, + { + "compression_loss": 0.0, + "epoch": 3.62, + "label_loss": 0.7007139921188354, + "learning_rate": 8.899780321009544e-06, + "loss": 0.2647, + "step": 20057, + "teacher_loss": 0.2162424921989441 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.9428095817565918, + "learning_rate": 8.897705445107762e-06, + "loss": 0.2394, + "step": 20058, + "teacher_loss": 0.16121414303779602 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.19587141275405884, + "learning_rate": 8.895630709113299e-06, + "loss": 0.2031, + "step": 20059, + "teacher_loss": 0.20394980907440186 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.4799688160419464, + "learning_rate": 8.893556113073718e-06, + "loss": 0.243, + "step": 20060, + "teacher_loss": 0.2167043387889862 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.08864134550094604, + "learning_rate": 8.891481657036589e-06, + "loss": 0.1596, + "step": 20061, + "teacher_loss": 0.16744858026504517 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5029218792915344, + "learning_rate": 8.889407341049472e-06, + "loss": 0.2393, + "step": 20062, + "teacher_loss": 0.21003496646881104 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.14628297090530396, + "learning_rate": 8.887333165159921e-06, + "loss": 0.1691, + "step": 20063, + "teacher_loss": 0.17162545025348663 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5763274431228638, + "learning_rate": 8.885259129415497e-06, + "loss": 0.1934, + "step": 20064, + "teacher_loss": 0.15080051124095917 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.46548789739608765, + "learning_rate": 8.88318523386375e-06, + "loss": 0.2315, + "step": 20065, + "teacher_loss": 0.20555195212364197 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.8760215044021606, + "learning_rate": 8.881111478552221e-06, + "loss": 0.2636, + "step": 20066, + "teacher_loss": 0.19559669494628906 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5683371424674988, + "learning_rate": 8.879037863528464e-06, + "loss": 0.2172, + "step": 20067, + "teacher_loss": 0.17822539806365967 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.2963174283504486, + "learning_rate": 8.876964388840019e-06, + "loss": 0.2069, + "step": 20068, + "teacher_loss": 0.19691653549671173 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.7573115825653076, + "learning_rate": 8.874891054534416e-06, + "loss": 0.318, + "step": 20069, + "teacher_loss": 0.26917916536331177 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.20222747325897217, + "learning_rate": 8.872817860659203e-06, + "loss": 0.185, + "step": 20070, + "teacher_loss": 0.18308418989181519 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.23196372389793396, + "learning_rate": 8.870744807261908e-06, + "loss": 0.2156, + "step": 20071, + "teacher_loss": 0.2137642353773117 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.36386919021606445, + "learning_rate": 8.868671894390056e-06, + "loss": 0.2418, + "step": 20072, + "teacher_loss": 0.22822481393814087 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.46272265911102295, + "learning_rate": 8.86659912209117e-06, + "loss": 0.2253, + "step": 20073, + "teacher_loss": 0.19888511300086975 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5408502817153931, + "learning_rate": 8.86452649041278e-06, + "loss": 0.1977, + "step": 20074, + "teacher_loss": 0.15957465767860413 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.41661757230758667, + "learning_rate": 8.862453999402406e-06, + "loss": 0.2604, + "step": 20075, + "teacher_loss": 0.24306653439998627 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.7930693626403809, + "learning_rate": 8.860381649107553e-06, + "loss": 0.313, + "step": 20076, + "teacher_loss": 0.2596908509731293 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.2829344868659973, + "learning_rate": 8.858309439575747e-06, + "loss": 0.1325, + "step": 20077, + "teacher_loss": 0.11576279997825623 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 1.1637897491455078, + "learning_rate": 8.856237370854493e-06, + "loss": 0.5858, + "step": 20078, + "teacher_loss": 0.5215364098548889 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.4017188549041748, + "learning_rate": 8.85416544299129e-06, + "loss": 0.252, + "step": 20079, + "teacher_loss": 0.2353803515434265 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.29159560799598694, + "learning_rate": 8.852093656033654e-06, + "loss": 0.1824, + "step": 20080, + "teacher_loss": 0.17028193175792694 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.9635019898414612, + "learning_rate": 8.850022010029077e-06, + "loss": 0.307, + "step": 20081, + "teacher_loss": 0.23407316207885742 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.44538551568984985, + "learning_rate": 8.84795050502505e-06, + "loss": 0.1841, + "step": 20082, + "teacher_loss": 0.15502850711345673 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.21815159916877747, + "learning_rate": 8.845879141069082e-06, + "loss": 0.1667, + "step": 20083, + "teacher_loss": 0.16098147630691528 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.315278559923172, + "learning_rate": 8.84380791820865e-06, + "loss": 0.1872, + "step": 20084, + "teacher_loss": 0.17291617393493652 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5410944819450378, + "learning_rate": 8.841736836491245e-06, + "loss": 0.287, + "step": 20085, + "teacher_loss": 0.2588125169277191 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.6641720533370972, + "learning_rate": 8.839665895964352e-06, + "loss": 0.2421, + "step": 20086, + "teacher_loss": 0.1952308714389801 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.3963237404823303, + "learning_rate": 8.837595096675451e-06, + "loss": 0.2196, + "step": 20087, + "teacher_loss": 0.2000068575143814 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5264793634414673, + "learning_rate": 8.835524438672013e-06, + "loss": 0.2557, + "step": 20088, + "teacher_loss": 0.2256353199481964 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.2969478964805603, + "learning_rate": 8.833453922001524e-06, + "loss": 0.2212, + "step": 20089, + "teacher_loss": 0.21273405849933624 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.648070216178894, + "learning_rate": 8.831383546711448e-06, + "loss": 0.1893, + "step": 20090, + "teacher_loss": 0.1383148431777954 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.4691910743713379, + "learning_rate": 8.829313312849248e-06, + "loss": 0.224, + "step": 20091, + "teacher_loss": 0.1967625916004181 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.3811272978782654, + "learning_rate": 8.827243220462398e-06, + "loss": 0.2162, + "step": 20092, + "teacher_loss": 0.19787511229515076 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5865489840507507, + "learning_rate": 8.825173269598354e-06, + "loss": 0.2552, + "step": 20093, + "teacher_loss": 0.21843412518501282 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.5812729001045227, + "learning_rate": 8.823103460304573e-06, + "loss": 0.263, + "step": 20094, + "teacher_loss": 0.2276797890663147 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.2159494161605835, + "learning_rate": 8.821033792628503e-06, + "loss": 0.1398, + "step": 20095, + "teacher_loss": 0.13137969374656677 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.6607014536857605, + "learning_rate": 8.818964266617614e-06, + "loss": 0.1963, + "step": 20096, + "teacher_loss": 0.14465680718421936 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.4555264711380005, + "learning_rate": 8.816894882319339e-06, + "loss": 0.2772, + "step": 20097, + "teacher_loss": 0.2573592960834503 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.8002545833587646, + "learning_rate": 8.814825639781123e-06, + "loss": 0.3438, + "step": 20098, + "teacher_loss": 0.29311633110046387 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.21377700567245483, + "learning_rate": 8.812756539050413e-06, + "loss": 0.1654, + "step": 20099, + "teacher_loss": 0.16003045439720154 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.21958070993423462, + "learning_rate": 8.810687580174646e-06, + "loss": 0.1963, + "step": 20100, + "teacher_loss": 0.1937561184167862 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.32097071409225464, + "learning_rate": 8.808618763201253e-06, + "loss": 0.2769, + "step": 20101, + "teacher_loss": 0.27201932668685913 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.35265713930130005, + "learning_rate": 8.806550088177671e-06, + "loss": 0.2835, + "step": 20102, + "teacher_loss": 0.2758619487285614 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 1.5644676685333252, + "learning_rate": 8.80448155515133e-06, + "loss": 0.3634, + "step": 20103, + "teacher_loss": 0.22998693585395813 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.6261578798294067, + "learning_rate": 8.802413164169647e-06, + "loss": 0.2595, + "step": 20104, + "teacher_loss": 0.21875405311584473 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.3301115930080414, + "learning_rate": 8.80034491528005e-06, + "loss": 0.2062, + "step": 20105, + "teacher_loss": 0.19241949915885925 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.49898579716682434, + "learning_rate": 8.798276808529961e-06, + "loss": 0.2622, + "step": 20106, + "teacher_loss": 0.23591215908527374 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.4973953366279602, + "learning_rate": 8.796208843966784e-06, + "loss": 0.263, + "step": 20107, + "teacher_loss": 0.236973375082016 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.41038110852241516, + "learning_rate": 8.794141021637944e-06, + "loss": 0.29, + "step": 20108, + "teacher_loss": 0.27661973237991333 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.3936123549938202, + "learning_rate": 8.792073341590843e-06, + "loss": 0.1599, + "step": 20109, + "teacher_loss": 0.1339282989501953 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.33661508560180664, + "learning_rate": 8.790005803872884e-06, + "loss": 0.2002, + "step": 20110, + "teacher_loss": 0.1850610375404358 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.2975943088531494, + "learning_rate": 8.787938408531478e-06, + "loss": 0.2094, + "step": 20111, + "teacher_loss": 0.1995452344417572 + }, + { + "compression_loss": 0.0, + "epoch": 3.63, + "label_loss": 0.4563060700893402, + "learning_rate": 8.785871155614019e-06, + "loss": 0.186, + "step": 20112, + "teacher_loss": 0.1560056209564209 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3161929249763489, + "learning_rate": 8.7838040451679e-06, + "loss": 0.2158, + "step": 20113, + "teacher_loss": 0.20463277399539948 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.9171396493911743, + "learning_rate": 8.78173707724052e-06, + "loss": 0.2339, + "step": 20114, + "teacher_loss": 0.1579330861568451 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.39789557456970215, + "learning_rate": 8.779670251879265e-06, + "loss": 0.2461, + "step": 20115, + "teacher_loss": 0.22918111085891724 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.35162532329559326, + "learning_rate": 8.777603569131526e-06, + "loss": 0.1989, + "step": 20116, + "teacher_loss": 0.18192234635353088 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.37086138129234314, + "learning_rate": 8.77553702904467e-06, + "loss": 0.2186, + "step": 20117, + "teacher_loss": 0.20163457095623016 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.27766144275665283, + "learning_rate": 8.7734706316661e-06, + "loss": 0.2892, + "step": 20118, + "teacher_loss": 0.29052528738975525 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.243914395570755, + "learning_rate": 8.771404377043177e-06, + "loss": 0.2158, + "step": 20119, + "teacher_loss": 0.21273070573806763 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.46920087933540344, + "learning_rate": 8.76933826522327e-06, + "loss": 0.24, + "step": 20120, + "teacher_loss": 0.21455080807209015 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.25863173604011536, + "learning_rate": 8.767272296253766e-06, + "loss": 0.1798, + "step": 20121, + "teacher_loss": 0.171036958694458 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.7251174449920654, + "learning_rate": 8.765206470182018e-06, + "loss": 0.2916, + "step": 20122, + "teacher_loss": 0.24340078234672546 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.2674739360809326, + "learning_rate": 8.763140787055388e-06, + "loss": 0.2369, + "step": 20123, + "teacher_loss": 0.23350904881954193 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.5866222977638245, + "learning_rate": 8.761075246921246e-06, + "loss": 0.2758, + "step": 20124, + "teacher_loss": 0.24129986763000488 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.2375549077987671, + "learning_rate": 8.759009849826943e-06, + "loss": 0.1841, + "step": 20125, + "teacher_loss": 0.1781502366065979 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.671151876449585, + "learning_rate": 8.756944595819827e-06, + "loss": 0.3537, + "step": 20126, + "teacher_loss": 0.31845611333847046 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.9768250584602356, + "learning_rate": 8.754879484947261e-06, + "loss": 0.2996, + "step": 20127, + "teacher_loss": 0.22433406114578247 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.44259709119796753, + "learning_rate": 8.752814517256587e-06, + "loss": 0.2276, + "step": 20128, + "teacher_loss": 0.2037428915500641 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3680911064147949, + "learning_rate": 8.750749692795139e-06, + "loss": 0.2112, + "step": 20129, + "teacher_loss": 0.1938011646270752 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.7616530060768127, + "learning_rate": 8.748685011610264e-06, + "loss": 0.3618, + "step": 20130, + "teacher_loss": 0.3173372745513916 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.31108957529067993, + "learning_rate": 8.746620473749307e-06, + "loss": 0.1927, + "step": 20131, + "teacher_loss": 0.17951622605323792 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.5059388875961304, + "learning_rate": 8.744556079259586e-06, + "loss": 0.2164, + "step": 20132, + "teacher_loss": 0.1841844618320465 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.13759613037109375, + "learning_rate": 8.74249182818844e-06, + "loss": 0.1911, + "step": 20133, + "teacher_loss": 0.19709071516990662 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.23851510882377625, + "learning_rate": 8.740427720583199e-06, + "loss": 0.1868, + "step": 20134, + "teacher_loss": 0.18101832270622253 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.082100510597229, + "learning_rate": 8.738363756491186e-06, + "loss": 0.13, + "step": 20135, + "teacher_loss": 0.1352669596672058 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.6354144215583801, + "learning_rate": 8.736299935959706e-06, + "loss": 0.2475, + "step": 20136, + "teacher_loss": 0.20438790321350098 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.4087774157524109, + "learning_rate": 8.734236259036101e-06, + "loss": 0.2781, + "step": 20137, + "teacher_loss": 0.26354488730430603 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.6276995539665222, + "learning_rate": 8.732172725767673e-06, + "loss": 0.27, + "step": 20138, + "teacher_loss": 0.23030540347099304 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.48228561878204346, + "learning_rate": 8.730109336201726e-06, + "loss": 0.1914, + "step": 20139, + "teacher_loss": 0.1590750515460968 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.46530789136886597, + "learning_rate": 8.728046090385572e-06, + "loss": 0.3492, + "step": 20140, + "teacher_loss": 0.3363426923751831 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.4980327785015106, + "learning_rate": 8.725982988366527e-06, + "loss": 0.3404, + "step": 20141, + "teacher_loss": 0.32283854484558105 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.7775996923446655, + "learning_rate": 8.723920030191874e-06, + "loss": 0.3359, + "step": 20142, + "teacher_loss": 0.2868744134902954 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.29764047265052795, + "learning_rate": 8.721857215908916e-06, + "loss": 0.2195, + "step": 20143, + "teacher_loss": 0.21085713803768158 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.4057222306728363, + "learning_rate": 8.719794545564957e-06, + "loss": 0.2728, + "step": 20144, + "teacher_loss": 0.2580621838569641 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.2733703851699829, + "learning_rate": 8.717732019207274e-06, + "loss": 0.214, + "step": 20145, + "teacher_loss": 0.20743875205516815 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.5477606058120728, + "learning_rate": 8.71566963688316e-06, + "loss": 0.2585, + "step": 20146, + "teacher_loss": 0.22630661725997925 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.25076591968536377, + "learning_rate": 8.713607398639909e-06, + "loss": 0.1836, + "step": 20147, + "teacher_loss": 0.1761234700679779 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.1703055500984192, + "learning_rate": 8.711545304524781e-06, + "loss": 0.1849, + "step": 20148, + "teacher_loss": 0.1864854246377945 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.2854404151439667, + "learning_rate": 8.70948335458507e-06, + "loss": 0.2203, + "step": 20149, + "teacher_loss": 0.21306897699832916 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.2666966915130615, + "learning_rate": 8.70742154886805e-06, + "loss": 0.2164, + "step": 20150, + "teacher_loss": 0.21078705787658691 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.2651047706604004, + "learning_rate": 8.705359887420982e-06, + "loss": 0.1753, + "step": 20151, + "teacher_loss": 0.16531670093536377 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3236709237098694, + "learning_rate": 8.703298370291146e-06, + "loss": 0.2077, + "step": 20152, + "teacher_loss": 0.19479495286941528 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.5478780269622803, + "learning_rate": 8.701236997525792e-06, + "loss": 0.2968, + "step": 20153, + "teacher_loss": 0.2689024806022644 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3117363154888153, + "learning_rate": 8.699175769172191e-06, + "loss": 0.1854, + "step": 20154, + "teacher_loss": 0.17131835222244263 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3794316053390503, + "learning_rate": 8.697114685277603e-06, + "loss": 0.2333, + "step": 20155, + "teacher_loss": 0.21705839037895203 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.30206358432769775, + "learning_rate": 8.695053745889274e-06, + "loss": 0.1828, + "step": 20156, + "teacher_loss": 0.16952402889728546 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3111790418624878, + "learning_rate": 8.692992951054464e-06, + "loss": 0.2182, + "step": 20157, + "teacher_loss": 0.20790278911590576 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3668109178543091, + "learning_rate": 8.69093230082041e-06, + "loss": 0.3025, + "step": 20158, + "teacher_loss": 0.29536116123199463 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3567535877227783, + "learning_rate": 8.688871795234365e-06, + "loss": 0.2268, + "step": 20159, + "teacher_loss": 0.21241214871406555 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.18598857522010803, + "learning_rate": 8.686811434343574e-06, + "loss": 0.1997, + "step": 20160, + "teacher_loss": 0.20123563706874847 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.5409897565841675, + "learning_rate": 8.68475121819526e-06, + "loss": 0.3076, + "step": 20161, + "teacher_loss": 0.28170478343963623 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3420236110687256, + "learning_rate": 8.682691146836674e-06, + "loss": 0.184, + "step": 20162, + "teacher_loss": 0.1664143204689026 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.46551820635795593, + "learning_rate": 8.680631220315034e-06, + "loss": 0.1692, + "step": 20163, + "teacher_loss": 0.13622616231441498 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.3419816493988037, + "learning_rate": 8.678571438677573e-06, + "loss": 0.1857, + "step": 20164, + "teacher_loss": 0.16830158233642578 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.33312928676605225, + "learning_rate": 8.676511801971522e-06, + "loss": 0.1744, + "step": 20165, + "teacher_loss": 0.15680186450481415 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.30069297552108765, + "learning_rate": 8.674452310244091e-06, + "loss": 0.2323, + "step": 20166, + "teacher_loss": 0.22467570006847382 + }, + { + "compression_loss": 0.0, + "epoch": 3.64, + "label_loss": 0.4934171438217163, + "learning_rate": 8.672392963542504e-06, + "loss": 0.2266, + "step": 20167, + "teacher_loss": 0.19699643552303314 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.6495916843414307, + "learning_rate": 8.67033376191398e-06, + "loss": 0.2064, + "step": 20168, + "teacher_loss": 0.15714342892169952 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.34311622381210327, + "learning_rate": 8.668274705405722e-06, + "loss": 0.2386, + "step": 20169, + "teacher_loss": 0.22699680924415588 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.31838762760162354, + "learning_rate": 8.666215794064939e-06, + "loss": 0.2915, + "step": 20170, + "teacher_loss": 0.28847840428352356 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4417462944984436, + "learning_rate": 8.664157027938846e-06, + "loss": 0.2436, + "step": 20171, + "teacher_loss": 0.2215639352798462 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.9922329187393188, + "learning_rate": 8.66209840707463e-06, + "loss": 0.3844, + "step": 20172, + "teacher_loss": 0.31684058904647827 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.26555538177490234, + "learning_rate": 8.660039931519495e-06, + "loss": 0.2155, + "step": 20173, + "teacher_loss": 0.20992310345172882 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.2855551540851593, + "learning_rate": 8.657981601320643e-06, + "loss": 0.2624, + "step": 20174, + "teacher_loss": 0.25986993312835693 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.692298412322998, + "learning_rate": 8.655923416525258e-06, + "loss": 0.2558, + "step": 20175, + "teacher_loss": 0.20730829238891602 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.7641059756278992, + "learning_rate": 8.653865377180517e-06, + "loss": 0.2749, + "step": 20176, + "teacher_loss": 0.2205168455839157 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.7931911945343018, + "learning_rate": 8.651807483333627e-06, + "loss": 0.3376, + "step": 20177, + "teacher_loss": 0.28699490427970886 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.32132911682128906, + "learning_rate": 8.649749735031758e-06, + "loss": 0.1621, + "step": 20178, + "teacher_loss": 0.14437633752822876 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4211026430130005, + "learning_rate": 8.647692132322084e-06, + "loss": 0.2097, + "step": 20179, + "teacher_loss": 0.18623338639736176 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.7544137239456177, + "learning_rate": 8.645634675251784e-06, + "loss": 0.3068, + "step": 20180, + "teacher_loss": 0.2570740282535553 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.37173134088516235, + "learning_rate": 8.643577363868036e-06, + "loss": 0.2214, + "step": 20181, + "teacher_loss": 0.20471572875976562 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.6343812346458435, + "learning_rate": 8.641520198217995e-06, + "loss": 0.2393, + "step": 20182, + "teacher_loss": 0.19541388750076294 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4001957178115845, + "learning_rate": 8.639463178348831e-06, + "loss": 0.2453, + "step": 20183, + "teacher_loss": 0.2280999720096588 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.5001657009124756, + "learning_rate": 8.637406304307714e-06, + "loss": 0.396, + "step": 20184, + "teacher_loss": 0.3844684362411499 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.5640811920166016, + "learning_rate": 8.635349576141795e-06, + "loss": 0.2438, + "step": 20185, + "teacher_loss": 0.2082669585943222 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.6251089572906494, + "learning_rate": 8.633292993898214e-06, + "loss": 0.2573, + "step": 20186, + "teacher_loss": 0.21646592020988464 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.3213304579257965, + "learning_rate": 8.631236557624149e-06, + "loss": 0.2364, + "step": 20187, + "teacher_loss": 0.22691258788108826 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.640985369682312, + "learning_rate": 8.629180267366736e-06, + "loss": 0.2176, + "step": 20188, + "teacher_loss": 0.170509934425354 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.3191848695278168, + "learning_rate": 8.627124123173112e-06, + "loss": 0.2336, + "step": 20189, + "teacher_loss": 0.22413255274295807 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.28675830364227295, + "learning_rate": 8.625068125090424e-06, + "loss": 0.2099, + "step": 20190, + "teacher_loss": 0.20134183764457703 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.6062551736831665, + "learning_rate": 8.62301227316582e-06, + "loss": 0.1923, + "step": 20191, + "teacher_loss": 0.14634083211421967 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.3339284658432007, + "learning_rate": 8.620956567446417e-06, + "loss": 0.2681, + "step": 20192, + "teacher_loss": 0.2608281970024109 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4099222719669342, + "learning_rate": 8.618901007979353e-06, + "loss": 0.2312, + "step": 20193, + "teacher_loss": 0.21135810017585754 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.22398120164871216, + "learning_rate": 8.616845594811765e-06, + "loss": 0.1565, + "step": 20194, + "teacher_loss": 0.14898596704006195 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4728623926639557, + "learning_rate": 8.614790327990766e-06, + "loss": 0.2602, + "step": 20195, + "teacher_loss": 0.23652443289756775 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.3112582862377167, + "learning_rate": 8.612735207563478e-06, + "loss": 0.2918, + "step": 20196, + "teacher_loss": 0.2895861268043518 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.29711753129959106, + "learning_rate": 8.610680233577029e-06, + "loss": 0.2017, + "step": 20197, + "teacher_loss": 0.19114097952842712 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.22253045439720154, + "learning_rate": 8.608625406078526e-06, + "loss": 0.1352, + "step": 20198, + "teacher_loss": 0.1254810094833374 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.19349834322929382, + "learning_rate": 8.606570725115068e-06, + "loss": 0.2, + "step": 20199, + "teacher_loss": 0.2007497251033783 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.3512103855609894, + "learning_rate": 8.604516190733786e-06, + "loss": 0.2537, + "step": 20200, + "teacher_loss": 0.24283862113952637 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.23127895593643188, + "learning_rate": 8.602461802981773e-06, + "loss": 0.1694, + "step": 20201, + "teacher_loss": 0.16256925463676453 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.5138158798217773, + "learning_rate": 8.600407561906127e-06, + "loss": 0.1972, + "step": 20202, + "teacher_loss": 0.16204431653022766 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.2443954199552536, + "learning_rate": 8.598353467553946e-06, + "loss": 0.1835, + "step": 20203, + "teacher_loss": 0.17674848437309265 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.7003247737884521, + "learning_rate": 8.596299519972333e-06, + "loss": 0.2857, + "step": 20204, + "teacher_loss": 0.2396685630083084 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.857781708240509, + "learning_rate": 8.594245719208367e-06, + "loss": 0.3947, + "step": 20205, + "teacher_loss": 0.3432343900203705 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.953424870967865, + "learning_rate": 8.59219206530914e-06, + "loss": 0.2608, + "step": 20206, + "teacher_loss": 0.18379944562911987 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.1468905806541443, + "learning_rate": 8.590138558321747e-06, + "loss": 0.1479, + "step": 20207, + "teacher_loss": 0.14801552891731262 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4537774622440338, + "learning_rate": 8.58808519829325e-06, + "loss": 0.2399, + "step": 20208, + "teacher_loss": 0.2161661982536316 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4669089913368225, + "learning_rate": 8.586031985270735e-06, + "loss": 0.2345, + "step": 20209, + "teacher_loss": 0.2086581289768219 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.17330892384052277, + "learning_rate": 8.583978919301284e-06, + "loss": 0.1555, + "step": 20210, + "teacher_loss": 0.15353038907051086 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4137945771217346, + "learning_rate": 8.581926000431954e-06, + "loss": 0.3127, + "step": 20211, + "teacher_loss": 0.30148079991340637 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.8254531621932983, + "learning_rate": 8.579873228709824e-06, + "loss": 0.2637, + "step": 20212, + "teacher_loss": 0.20128926634788513 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.45209255814552307, + "learning_rate": 8.577820604181948e-06, + "loss": 0.302, + "step": 20213, + "teacher_loss": 0.28537318110466003 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.27556315064430237, + "learning_rate": 8.575768126895387e-06, + "loss": 0.1987, + "step": 20214, + "teacher_loss": 0.19011583924293518 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.30866843461990356, + "learning_rate": 8.57371579689721e-06, + "loss": 0.2684, + "step": 20215, + "teacher_loss": 0.2638859152793884 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4336906671524048, + "learning_rate": 8.571663614234458e-06, + "loss": 0.2219, + "step": 20216, + "teacher_loss": 0.19842275977134705 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.5355225801467896, + "learning_rate": 8.569611578954186e-06, + "loss": 0.2924, + "step": 20217, + "teacher_loss": 0.2653810679912567 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.36329615116119385, + "learning_rate": 8.567559691103447e-06, + "loss": 0.2721, + "step": 20218, + "teacher_loss": 0.26202094554901123 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.46648550033569336, + "learning_rate": 8.56550795072927e-06, + "loss": 0.2471, + "step": 20219, + "teacher_loss": 0.22274476289749146 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.4365149140357971, + "learning_rate": 8.563456357878713e-06, + "loss": 0.1778, + "step": 20220, + "teacher_loss": 0.149072527885437 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.676209568977356, + "learning_rate": 8.561404912598798e-06, + "loss": 0.2441, + "step": 20221, + "teacher_loss": 0.19609782099723816 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.32545363903045654, + "learning_rate": 8.559353614936565e-06, + "loss": 0.2342, + "step": 20222, + "teacher_loss": 0.22407066822052002 + }, + { + "compression_loss": 0.0, + "epoch": 3.65, + "label_loss": 0.3496916890144348, + "learning_rate": 8.557302464939049e-06, + "loss": 0.2225, + "step": 20223, + "teacher_loss": 0.2084149271249771 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.20512907207012177, + "learning_rate": 8.555251462653264e-06, + "loss": 0.1571, + "step": 20224, + "teacher_loss": 0.1517784297466278 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.20934784412384033, + "learning_rate": 8.553200608126247e-06, + "loss": 0.2, + "step": 20225, + "teacher_loss": 0.1989302784204483 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5861580967903137, + "learning_rate": 8.551149901405006e-06, + "loss": 0.2046, + "step": 20226, + "teacher_loss": 0.16221728920936584 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.7154439687728882, + "learning_rate": 8.549099342536561e-06, + "loss": 0.2822, + "step": 20227, + "teacher_loss": 0.23411481082439423 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.23381400108337402, + "learning_rate": 8.547048931567936e-06, + "loss": 0.2189, + "step": 20228, + "teacher_loss": 0.2172200083732605 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.24642375111579895, + "learning_rate": 8.544998668546124e-06, + "loss": 0.1974, + "step": 20229, + "teacher_loss": 0.19196170568466187 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.39428532123565674, + "learning_rate": 8.542948553518142e-06, + "loss": 0.2247, + "step": 20230, + "teacher_loss": 0.20582188665866852 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5662950873374939, + "learning_rate": 8.540898586530996e-06, + "loss": 0.3535, + "step": 20231, + "teacher_loss": 0.32986587285995483 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.12203732132911682, + "learning_rate": 8.538848767631672e-06, + "loss": 0.1436, + "step": 20232, + "teacher_loss": 0.14601413905620575 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.3659417927265167, + "learning_rate": 8.536799096867176e-06, + "loss": 0.2323, + "step": 20233, + "teacher_loss": 0.2174283117055893 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.466904878616333, + "learning_rate": 8.534749574284505e-06, + "loss": 0.2377, + "step": 20234, + "teacher_loss": 0.2122471034526825 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.0996318832039833, + "learning_rate": 8.532700199930642e-06, + "loss": 0.132, + "step": 20235, + "teacher_loss": 0.1356450468301773 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.9772903919219971, + "learning_rate": 8.53065097385256e-06, + "loss": 0.3448, + "step": 20236, + "teacher_loss": 0.274472177028656 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5410850644111633, + "learning_rate": 8.528601896097269e-06, + "loss": 0.1853, + "step": 20237, + "teacher_loss": 0.14581036567687988 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.43765372037887573, + "learning_rate": 8.526552966711734e-06, + "loss": 0.1799, + "step": 20238, + "teacher_loss": 0.1512630134820938 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.44078630208969116, + "learning_rate": 8.524504185742923e-06, + "loss": 0.2372, + "step": 20239, + "teacher_loss": 0.2145601212978363 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.37051481008529663, + "learning_rate": 8.52245555323782e-06, + "loss": 0.2246, + "step": 20240, + "teacher_loss": 0.20836688578128815 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.2678895592689514, + "learning_rate": 8.520407069243395e-06, + "loss": 0.1973, + "step": 20241, + "teacher_loss": 0.18946653604507446 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 1.1230154037475586, + "learning_rate": 8.518358733806602e-06, + "loss": 0.3173, + "step": 20242, + "teacher_loss": 0.22779735922813416 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.7786638736724854, + "learning_rate": 8.516310546974411e-06, + "loss": 0.3261, + "step": 20243, + "teacher_loss": 0.275812029838562 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.37805962562561035, + "learning_rate": 8.514262508793784e-06, + "loss": 0.2483, + "step": 20244, + "teacher_loss": 0.23392270505428314 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5138403177261353, + "learning_rate": 8.51221461931167e-06, + "loss": 0.2251, + "step": 20245, + "teacher_loss": 0.19301816821098328 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.34679239988327026, + "learning_rate": 8.510166878575022e-06, + "loss": 0.2109, + "step": 20246, + "teacher_loss": 0.19579347968101501 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 1.0638108253479004, + "learning_rate": 8.508119286630795e-06, + "loss": 0.3026, + "step": 20247, + "teacher_loss": 0.2179986536502838 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4252743721008301, + "learning_rate": 8.506071843525931e-06, + "loss": 0.3248, + "step": 20248, + "teacher_loss": 0.31366756558418274 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.38507816195487976, + "learning_rate": 8.504024549307357e-06, + "loss": 0.4053, + "step": 20249, + "teacher_loss": 0.4074931740760803 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.2570658028125763, + "learning_rate": 8.501977404022034e-06, + "loss": 0.1761, + "step": 20250, + "teacher_loss": 0.16707256436347961 + }, + { + "epoch": 3.66, + "eval_exact_match": 80.2081362346263, + "eval_f1": 87.48776695731692, + "step": 20250 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.18904536962509155, + "learning_rate": 8.499930407716888e-06, + "loss": 0.3088, + "step": 20251, + "teacher_loss": 0.3221076428890228 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.27548471093177795, + "learning_rate": 8.497883560438845e-06, + "loss": 0.2193, + "step": 20252, + "teacher_loss": 0.21304328739643097 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.33199456334114075, + "learning_rate": 8.495836862234837e-06, + "loss": 0.1861, + "step": 20253, + "teacher_loss": 0.1699065864086151 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4198136031627655, + "learning_rate": 8.493790313151797e-06, + "loss": 0.1786, + "step": 20254, + "teacher_loss": 0.15182146430015564 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4110315144062042, + "learning_rate": 8.491743913236629e-06, + "loss": 0.1696, + "step": 20255, + "teacher_loss": 0.14281292259693146 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.12331907451152802, + "learning_rate": 8.489697662536264e-06, + "loss": 0.1334, + "step": 20256, + "teacher_loss": 0.13449972867965698 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.2964077591896057, + "learning_rate": 8.487651561097615e-06, + "loss": 0.2314, + "step": 20257, + "teacher_loss": 0.22417324781417847 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.9203104376792908, + "learning_rate": 8.485605608967586e-06, + "loss": 0.546, + "step": 20258, + "teacher_loss": 0.5044133067131042 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.1376548409461975, + "learning_rate": 8.48355980619309e-06, + "loss": 0.2226, + "step": 20259, + "teacher_loss": 0.2320544421672821 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.23093575239181519, + "learning_rate": 8.481514152821037e-06, + "loss": 0.1756, + "step": 20260, + "teacher_loss": 0.16949301958084106 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.8521703481674194, + "learning_rate": 8.479468648898319e-06, + "loss": 0.5114, + "step": 20261, + "teacher_loss": 0.4735168516635895 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5308355093002319, + "learning_rate": 8.47742329447183e-06, + "loss": 0.2022, + "step": 20262, + "teacher_loss": 0.16567447781562805 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.37594202160835266, + "learning_rate": 8.475378089588471e-06, + "loss": 0.1997, + "step": 20263, + "teacher_loss": 0.18008652329444885 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4753708243370056, + "learning_rate": 8.473333034295135e-06, + "loss": 0.2977, + "step": 20264, + "teacher_loss": 0.2779938578605652 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4141441583633423, + "learning_rate": 8.471288128638697e-06, + "loss": 0.3199, + "step": 20265, + "teacher_loss": 0.3094070553779602 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.38325268030166626, + "learning_rate": 8.46924337266605e-06, + "loss": 0.2191, + "step": 20266, + "teacher_loss": 0.20088501274585724 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4207196831703186, + "learning_rate": 8.467198766424078e-06, + "loss": 0.216, + "step": 20267, + "teacher_loss": 0.19322559237480164 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.1613987684249878, + "learning_rate": 8.465154309959648e-06, + "loss": 0.1847, + "step": 20268, + "teacher_loss": 0.18726277351379395 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.4992298483848572, + "learning_rate": 8.463110003319633e-06, + "loss": 0.254, + "step": 20269, + "teacher_loss": 0.22676478326320648 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.27575981616973877, + "learning_rate": 8.461065846550916e-06, + "loss": 0.2024, + "step": 20270, + "teacher_loss": 0.19425445795059204 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5447056293487549, + "learning_rate": 8.459021839700346e-06, + "loss": 0.1984, + "step": 20271, + "teacher_loss": 0.15990683436393738 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.40601879358291626, + "learning_rate": 8.456977982814796e-06, + "loss": 0.2415, + "step": 20272, + "teacher_loss": 0.22327515482902527 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5051358342170715, + "learning_rate": 8.454934275941129e-06, + "loss": 0.3202, + "step": 20273, + "teacher_loss": 0.29966962337493896 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.6076987981796265, + "learning_rate": 8.452890719126193e-06, + "loss": 0.2756, + "step": 20274, + "teacher_loss": 0.23864729702472687 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.7815028429031372, + "learning_rate": 8.450847312416845e-06, + "loss": 0.3498, + "step": 20275, + "teacher_loss": 0.3018571734428406 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.23986394703388214, + "learning_rate": 8.448804055859931e-06, + "loss": 0.1485, + "step": 20276, + "teacher_loss": 0.13837382197380066 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.35530006885528564, + "learning_rate": 8.446760949502296e-06, + "loss": 0.2752, + "step": 20277, + "teacher_loss": 0.26631683111190796 + }, + { + "compression_loss": 0.0, + "epoch": 3.66, + "label_loss": 0.5229827165603638, + "learning_rate": 8.444717993390792e-06, + "loss": 0.2796, + "step": 20278, + "teacher_loss": 0.2525658905506134 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.2837910056114197, + "learning_rate": 8.442675187572242e-06, + "loss": 0.2379, + "step": 20279, + "teacher_loss": 0.23279443383216858 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.6391610503196716, + "learning_rate": 8.440632532093493e-06, + "loss": 0.252, + "step": 20280, + "teacher_loss": 0.20892734825611115 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.34770065546035767, + "learning_rate": 8.43859002700138e-06, + "loss": 0.194, + "step": 20281, + "teacher_loss": 0.17691677808761597 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.7221857309341431, + "learning_rate": 8.436547672342717e-06, + "loss": 0.1891, + "step": 20282, + "teacher_loss": 0.12988263368606567 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.11522579193115234, + "learning_rate": 8.434505468164345e-06, + "loss": 0.1711, + "step": 20283, + "teacher_loss": 0.17727959156036377 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.21450819075107574, + "learning_rate": 8.432463414513072e-06, + "loss": 0.1717, + "step": 20284, + "teacher_loss": 0.16691450774669647 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.366448312997818, + "learning_rate": 8.43042151143573e-06, + "loss": 0.3625, + "step": 20285, + "teacher_loss": 0.36201101541519165 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.6070150136947632, + "learning_rate": 8.428379758979119e-06, + "loss": 0.2725, + "step": 20286, + "teacher_loss": 0.23537677526474 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5561429262161255, + "learning_rate": 8.426338157190058e-06, + "loss": 0.2657, + "step": 20287, + "teacher_loss": 0.2333860695362091 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.30099374055862427, + "learning_rate": 8.424296706115358e-06, + "loss": 0.3015, + "step": 20288, + "teacher_loss": 0.3015149235725403 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5226436257362366, + "learning_rate": 8.422255405801817e-06, + "loss": 0.3397, + "step": 20289, + "teacher_loss": 0.3193542957305908 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.32014569640159607, + "learning_rate": 8.420214256296235e-06, + "loss": 0.1942, + "step": 20290, + "teacher_loss": 0.1801832616329193 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5011449456214905, + "learning_rate": 8.41817325764542e-06, + "loss": 0.24, + "step": 20291, + "teacher_loss": 0.21096271276474 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.33939769864082336, + "learning_rate": 8.416132409896153e-06, + "loss": 0.2753, + "step": 20292, + "teacher_loss": 0.2682017385959625 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.40753042697906494, + "learning_rate": 8.41409171309523e-06, + "loss": 0.2346, + "step": 20293, + "teacher_loss": 0.2153315246105194 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.34742921590805054, + "learning_rate": 8.412051167289447e-06, + "loss": 0.2304, + "step": 20294, + "teacher_loss": 0.21736004948616028 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.4847373366355896, + "learning_rate": 8.410010772525571e-06, + "loss": 0.2578, + "step": 20295, + "teacher_loss": 0.23258140683174133 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.3675590455532074, + "learning_rate": 8.40797052885039e-06, + "loss": 0.2155, + "step": 20296, + "teacher_loss": 0.1986561119556427 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.4584321081638336, + "learning_rate": 8.405930436310688e-06, + "loss": 0.2883, + "step": 20297, + "teacher_loss": 0.2694227695465088 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.27614128589630127, + "learning_rate": 8.40389049495323e-06, + "loss": 0.1981, + "step": 20298, + "teacher_loss": 0.1893869936466217 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.2228301465511322, + "learning_rate": 8.401850704824775e-06, + "loss": 0.1998, + "step": 20299, + "teacher_loss": 0.19723562896251678 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5884919166564941, + "learning_rate": 8.399811065972112e-06, + "loss": 0.2554, + "step": 20300, + "teacher_loss": 0.21842415630817413 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.37131041288375854, + "learning_rate": 8.397771578441995e-06, + "loss": 0.2327, + "step": 20301, + "teacher_loss": 0.21731264889240265 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.7470121383666992, + "learning_rate": 8.395732242281175e-06, + "loss": 0.2843, + "step": 20302, + "teacher_loss": 0.23289614915847778 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.6012039184570312, + "learning_rate": 8.393693057536412e-06, + "loss": 0.4507, + "step": 20303, + "teacher_loss": 0.43402254581451416 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.689649224281311, + "learning_rate": 8.391654024254465e-06, + "loss": 0.2492, + "step": 20304, + "teacher_loss": 0.2002355009317398 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.14456743001937866, + "learning_rate": 8.389615142482078e-06, + "loss": 0.1452, + "step": 20305, + "teacher_loss": 0.14532293379306793 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.1396913081407547, + "learning_rate": 8.387576412265994e-06, + "loss": 0.1611, + "step": 20306, + "teacher_loss": 0.1634686291217804 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.28904545307159424, + "learning_rate": 8.385537833652963e-06, + "loss": 0.2849, + "step": 20307, + "teacher_loss": 0.2844006419181824 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.2781648337841034, + "learning_rate": 8.383499406689718e-06, + "loss": 0.2269, + "step": 20308, + "teacher_loss": 0.22123923897743225 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.6340066194534302, + "learning_rate": 8.381461131422984e-06, + "loss": 0.2394, + "step": 20309, + "teacher_loss": 0.1955721378326416 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.33694514632225037, + "learning_rate": 8.379423007899513e-06, + "loss": 0.2403, + "step": 20310, + "teacher_loss": 0.22953510284423828 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5375656485557556, + "learning_rate": 8.377385036166024e-06, + "loss": 0.4538, + "step": 20311, + "teacher_loss": 0.4444499611854553 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.26863205432891846, + "learning_rate": 8.375347216269236e-06, + "loss": 0.1569, + "step": 20312, + "teacher_loss": 0.14447666704654694 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.2612832188606262, + "learning_rate": 8.373309548255871e-06, + "loss": 0.2026, + "step": 20313, + "teacher_loss": 0.19604083895683289 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5189799070358276, + "learning_rate": 8.371272032172658e-06, + "loss": 0.2403, + "step": 20314, + "teacher_loss": 0.20935894548892975 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.4244531989097595, + "learning_rate": 8.369234668066301e-06, + "loss": 0.2819, + "step": 20315, + "teacher_loss": 0.266079843044281 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.2923707067966461, + "learning_rate": 8.36719745598351e-06, + "loss": 0.2307, + "step": 20316, + "teacher_loss": 0.22387002408504486 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.288173645734787, + "learning_rate": 8.365160395970997e-06, + "loss": 0.2125, + "step": 20317, + "teacher_loss": 0.20408092439174652 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.3977069854736328, + "learning_rate": 8.363123488075464e-06, + "loss": 0.3739, + "step": 20318, + "teacher_loss": 0.37125885486602783 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 1.0839234590530396, + "learning_rate": 8.361086732343607e-06, + "loss": 0.3195, + "step": 20319, + "teacher_loss": 0.23459284007549286 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.21802380681037903, + "learning_rate": 8.359050128822135e-06, + "loss": 0.247, + "step": 20320, + "teacher_loss": 0.25027501583099365 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.3903188109397888, + "learning_rate": 8.357013677557725e-06, + "loss": 0.239, + "step": 20321, + "teacher_loss": 0.22220875322818756 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.5343143343925476, + "learning_rate": 8.354977378597077e-06, + "loss": 0.2601, + "step": 20322, + "teacher_loss": 0.2296162247657776 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.8922644853591919, + "learning_rate": 8.352941231986881e-06, + "loss": 0.4402, + "step": 20323, + "teacher_loss": 0.3899794816970825 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.33488988876342773, + "learning_rate": 8.35090523777381e-06, + "loss": 0.1856, + "step": 20324, + "teacher_loss": 0.16898152232170105 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.3360133171081543, + "learning_rate": 8.348869396004545e-06, + "loss": 0.1502, + "step": 20325, + "teacher_loss": 0.1295267939567566 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.418695867061615, + "learning_rate": 8.346833706725763e-06, + "loss": 0.2011, + "step": 20326, + "teacher_loss": 0.17686712741851807 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.14462333917617798, + "learning_rate": 8.34479816998414e-06, + "loss": 0.1617, + "step": 20327, + "teacher_loss": 0.16357830166816711 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.48511573672294617, + "learning_rate": 8.342762785826338e-06, + "loss": 0.2382, + "step": 20328, + "teacher_loss": 0.21080774068832397 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.4053296446800232, + "learning_rate": 8.340727554299025e-06, + "loss": 0.2342, + "step": 20329, + "teacher_loss": 0.21518591046333313 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.2639651894569397, + "learning_rate": 8.33869247544887e-06, + "loss": 0.1804, + "step": 20330, + "teacher_loss": 0.1710660755634308 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.4180266261100769, + "learning_rate": 8.33665754932252e-06, + "loss": 0.2205, + "step": 20331, + "teacher_loss": 0.19860179722309113 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.6020634174346924, + "learning_rate": 8.334622775966634e-06, + "loss": 0.2835, + "step": 20332, + "teacher_loss": 0.24805109202861786 + }, + { + "compression_loss": 0.0, + "epoch": 3.67, + "label_loss": 0.8178966045379639, + "learning_rate": 8.332588155427869e-06, + "loss": 0.2442, + "step": 20333, + "teacher_loss": 0.18041634559631348 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.24088552594184875, + "learning_rate": 8.330553687752861e-06, + "loss": 0.1513, + "step": 20334, + "teacher_loss": 0.14134082198143005 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2003917694091797, + "learning_rate": 8.328519372988268e-06, + "loss": 0.1816, + "step": 20335, + "teacher_loss": 0.17948219180107117 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2967814803123474, + "learning_rate": 8.32648521118072e-06, + "loss": 0.2628, + "step": 20336, + "teacher_loss": 0.25899559259414673 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2348853200674057, + "learning_rate": 8.324451202376855e-06, + "loss": 0.2041, + "step": 20337, + "teacher_loss": 0.20064151287078857 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.21910922229290009, + "learning_rate": 8.322417346623317e-06, + "loss": 0.2039, + "step": 20338, + "teacher_loss": 0.20220354199409485 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.479958176612854, + "learning_rate": 8.320383643966721e-06, + "loss": 0.3048, + "step": 20339, + "teacher_loss": 0.2853829264640808 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.5123432874679565, + "learning_rate": 8.318350094453702e-06, + "loss": 0.2228, + "step": 20340, + "teacher_loss": 0.19068023562431335 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.30526140332221985, + "learning_rate": 8.31631669813089e-06, + "loss": 0.1997, + "step": 20341, + "teacher_loss": 0.18794922530651093 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.30018049478530884, + "learning_rate": 8.31428345504489e-06, + "loss": 0.2734, + "step": 20342, + "teacher_loss": 0.27042222023010254 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.6939787864685059, + "learning_rate": 8.312250365242331e-06, + "loss": 0.439, + "step": 20343, + "teacher_loss": 0.4106296896934509 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.5731590986251831, + "learning_rate": 8.310217428769816e-06, + "loss": 0.2604, + "step": 20344, + "teacher_loss": 0.22563323378562927 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.6227819919586182, + "learning_rate": 8.308184645673958e-06, + "loss": 0.3086, + "step": 20345, + "teacher_loss": 0.27365371584892273 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2726311981678009, + "learning_rate": 8.306152016001368e-06, + "loss": 0.2262, + "step": 20346, + "teacher_loss": 0.2210034281015396 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2026922106742859, + "learning_rate": 8.304119539798638e-06, + "loss": 0.177, + "step": 20347, + "teacher_loss": 0.17410127818584442 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.261749804019928, + "learning_rate": 8.302087217112377e-06, + "loss": 0.1814, + "step": 20348, + "teacher_loss": 0.17242054641246796 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.4985075891017914, + "learning_rate": 8.30005504798917e-06, + "loss": 0.2441, + "step": 20349, + "teacher_loss": 0.21580630540847778 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.7892706990242004, + "learning_rate": 8.298023032475613e-06, + "loss": 0.2459, + "step": 20350, + "teacher_loss": 0.18557390570640564 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.27995237708091736, + "learning_rate": 8.2959911706183e-06, + "loss": 0.183, + "step": 20351, + "teacher_loss": 0.1721746325492859 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.30418139696121216, + "learning_rate": 8.293959462463802e-06, + "loss": 0.1888, + "step": 20352, + "teacher_loss": 0.17603248357772827 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.46759098768234253, + "learning_rate": 8.29192790805871e-06, + "loss": 0.2522, + "step": 20353, + "teacher_loss": 0.22831980884075165 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.7757827043533325, + "learning_rate": 8.289896507449604e-06, + "loss": 0.3346, + "step": 20354, + "teacher_loss": 0.2855909466743469 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.5297353267669678, + "learning_rate": 8.28786526068305e-06, + "loss": 0.4594, + "step": 20355, + "teacher_loss": 0.45154690742492676 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.6049448251724243, + "learning_rate": 8.285834167805617e-06, + "loss": 0.3056, + "step": 20356, + "teacher_loss": 0.2723514437675476 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.550566554069519, + "learning_rate": 8.283803228863886e-06, + "loss": 0.288, + "step": 20357, + "teacher_loss": 0.25881457328796387 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.4555302858352661, + "learning_rate": 8.281772443904409e-06, + "loss": 0.2217, + "step": 20358, + "teacher_loss": 0.19570600986480713 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.42996734380722046, + "learning_rate": 8.279741812973736e-06, + "loss": 0.2497, + "step": 20359, + "teacher_loss": 0.22970044612884521 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.5414090156555176, + "learning_rate": 8.277711336118446e-06, + "loss": 0.1725, + "step": 20360, + "teacher_loss": 0.13145697116851807 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.3630410432815552, + "learning_rate": 8.275681013385083e-06, + "loss": 0.2105, + "step": 20361, + "teacher_loss": 0.19349659979343414 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.44658923149108887, + "learning_rate": 8.27365084482018e-06, + "loss": 0.2628, + "step": 20362, + "teacher_loss": 0.24239778518676758 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.21085229516029358, + "learning_rate": 8.271620830470308e-06, + "loss": 0.188, + "step": 20363, + "teacher_loss": 0.18547658622264862 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.5636534690856934, + "learning_rate": 8.269590970381999e-06, + "loss": 0.2355, + "step": 20364, + "teacher_loss": 0.19901852309703827 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.26244986057281494, + "learning_rate": 8.267561264601783e-06, + "loss": 0.1651, + "step": 20365, + "teacher_loss": 0.15428493916988373 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.4168131351470947, + "learning_rate": 8.265531713176204e-06, + "loss": 0.2612, + "step": 20366, + "teacher_loss": 0.24396264553070068 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.638247013092041, + "learning_rate": 8.263502316151797e-06, + "loss": 0.4886, + "step": 20367, + "teacher_loss": 0.47201007604599 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.39159074425697327, + "learning_rate": 8.261473073575076e-06, + "loss": 0.2228, + "step": 20368, + "teacher_loss": 0.20407122373580933 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.4306749999523163, + "learning_rate": 8.259443985492577e-06, + "loss": 0.3133, + "step": 20369, + "teacher_loss": 0.300204336643219 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2095477432012558, + "learning_rate": 8.257415051950825e-06, + "loss": 0.1812, + "step": 20370, + "teacher_loss": 0.17806529998779297 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.5440797209739685, + "learning_rate": 8.25538627299633e-06, + "loss": 0.2741, + "step": 20371, + "teacher_loss": 0.2441214770078659 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.7079646587371826, + "learning_rate": 8.253357648675594e-06, + "loss": 0.2744, + "step": 20372, + "teacher_loss": 0.22624942660331726 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.46335119009017944, + "learning_rate": 8.251329179035151e-06, + "loss": 0.2922, + "step": 20373, + "teacher_loss": 0.2731907069683075 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.32143378257751465, + "learning_rate": 8.249300864121497e-06, + "loss": 0.2561, + "step": 20374, + "teacher_loss": 0.24881336092948914 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.29442209005355835, + "learning_rate": 8.247272703981127e-06, + "loss": 0.3145, + "step": 20375, + "teacher_loss": 0.3166942000389099 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.4065555930137634, + "learning_rate": 8.245244698660553e-06, + "loss": 0.2559, + "step": 20376, + "teacher_loss": 0.23910623788833618 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.23264452815055847, + "learning_rate": 8.243216848206267e-06, + "loss": 0.1833, + "step": 20377, + "teacher_loss": 0.17780563235282898 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.24286848306655884, + "learning_rate": 8.241189152664756e-06, + "loss": 0.2228, + "step": 20378, + "teacher_loss": 0.22058901190757751 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.47323596477508545, + "learning_rate": 8.239161612082515e-06, + "loss": 0.2998, + "step": 20379, + "teacher_loss": 0.2805844843387604 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.25970566272735596, + "learning_rate": 8.237134226506033e-06, + "loss": 0.1895, + "step": 20380, + "teacher_loss": 0.18165087699890137 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 1.0713167190551758, + "learning_rate": 8.235106995981783e-06, + "loss": 0.273, + "step": 20381, + "teacher_loss": 0.1843479573726654 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2944996953010559, + "learning_rate": 8.233079920556246e-06, + "loss": 0.2429, + "step": 20382, + "teacher_loss": 0.23720312118530273 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.2966660261154175, + "learning_rate": 8.231053000275904e-06, + "loss": 0.2405, + "step": 20383, + "teacher_loss": 0.23430955410003662 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.4842275083065033, + "learning_rate": 8.229026235187215e-06, + "loss": 0.1763, + "step": 20384, + "teacher_loss": 0.14207223057746887 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.3229365348815918, + "learning_rate": 8.226999625336663e-06, + "loss": 0.2425, + "step": 20385, + "teacher_loss": 0.2335261106491089 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.3411158323287964, + "learning_rate": 8.224973170770695e-06, + "loss": 0.2045, + "step": 20386, + "teacher_loss": 0.1892724335193634 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.782532811164856, + "learning_rate": 8.222946871535786e-06, + "loss": 0.3481, + "step": 20387, + "teacher_loss": 0.2998234033584595 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.7450612783432007, + "learning_rate": 8.220920727678383e-06, + "loss": 0.344, + "step": 20388, + "teacher_loss": 0.2994205355644226 + }, + { + "compression_loss": 0.0, + "epoch": 3.68, + "label_loss": 0.1958162784576416, + "learning_rate": 8.218894739244939e-06, + "loss": 0.1665, + "step": 20389, + "teacher_loss": 0.16325756907463074 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3126941919326782, + "learning_rate": 8.216868906281918e-06, + "loss": 0.229, + "step": 20390, + "teacher_loss": 0.21969658136367798 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.22135841846466064, + "learning_rate": 8.214843228835746e-06, + "loss": 0.1789, + "step": 20391, + "teacher_loss": 0.17416563630104065 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.31070998311042786, + "learning_rate": 8.212817706952878e-06, + "loss": 0.2127, + "step": 20392, + "teacher_loss": 0.20176814496517181 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.4560564458370209, + "learning_rate": 8.210792340679756e-06, + "loss": 0.2079, + "step": 20393, + "teacher_loss": 0.18028277158737183 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.6389520764350891, + "learning_rate": 8.208767130062805e-06, + "loss": 0.3146, + "step": 20394, + "teacher_loss": 0.278572142124176 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.4027354419231415, + "learning_rate": 8.206742075148462e-06, + "loss": 0.2546, + "step": 20395, + "teacher_loss": 0.2381429523229599 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3905818462371826, + "learning_rate": 8.204717175983163e-06, + "loss": 0.304, + "step": 20396, + "teacher_loss": 0.2943894565105438 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.28477054834365845, + "learning_rate": 8.20269243261332e-06, + "loss": 0.1656, + "step": 20397, + "teacher_loss": 0.15231354534626007 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.6624535322189331, + "learning_rate": 8.200667845085365e-06, + "loss": 0.3406, + "step": 20398, + "teacher_loss": 0.30482351779937744 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.38137638568878174, + "learning_rate": 8.198643413445705e-06, + "loss": 0.1973, + "step": 20399, + "teacher_loss": 0.17686955630779266 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3406970500946045, + "learning_rate": 8.19661913774076e-06, + "loss": 0.2355, + "step": 20400, + "teacher_loss": 0.22382305562496185 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.7200182676315308, + "learning_rate": 8.194595018016949e-06, + "loss": 0.3264, + "step": 20401, + "teacher_loss": 0.2826971709728241 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3059428930282593, + "learning_rate": 8.19257105432066e-06, + "loss": 0.3012, + "step": 20402, + "teacher_loss": 0.3006381392478943 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3222476541996002, + "learning_rate": 8.190547246698311e-06, + "loss": 0.1706, + "step": 20403, + "teacher_loss": 0.1537092924118042 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.5360685586929321, + "learning_rate": 8.1885235951963e-06, + "loss": 0.1937, + "step": 20404, + "teacher_loss": 0.1556239277124405 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3255910873413086, + "learning_rate": 8.186500099861019e-06, + "loss": 0.2305, + "step": 20405, + "teacher_loss": 0.21998965740203857 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.37914547324180603, + "learning_rate": 8.184476760738867e-06, + "loss": 0.2086, + "step": 20406, + "teacher_loss": 0.18962402641773224 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.49698537588119507, + "learning_rate": 8.182453577876224e-06, + "loss": 0.2056, + "step": 20407, + "teacher_loss": 0.17320981621742249 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.13500294089317322, + "learning_rate": 8.180430551319486e-06, + "loss": 0.1808, + "step": 20408, + "teacher_loss": 0.18585175275802612 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.12996836006641388, + "learning_rate": 8.178407681115023e-06, + "loss": 0.1603, + "step": 20409, + "teacher_loss": 0.16366402804851532 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.4867998957633972, + "learning_rate": 8.176384967309218e-06, + "loss": 0.2598, + "step": 20410, + "teacher_loss": 0.23454934358596802 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.42236441373825073, + "learning_rate": 8.174362409948456e-06, + "loss": 0.2816, + "step": 20411, + "teacher_loss": 0.26599812507629395 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3552388846874237, + "learning_rate": 8.172340009079091e-06, + "loss": 0.2534, + "step": 20412, + "teacher_loss": 0.2420380711555481 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.4141189157962799, + "learning_rate": 8.170317764747501e-06, + "loss": 0.2162, + "step": 20413, + "teacher_loss": 0.19424453377723694 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.2065640091896057, + "learning_rate": 8.168295677000054e-06, + "loss": 0.151, + "step": 20414, + "teacher_loss": 0.14484231173992157 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.6622151136398315, + "learning_rate": 8.166273745883098e-06, + "loss": 0.3225, + "step": 20415, + "teacher_loss": 0.28471681475639343 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.42239901423454285, + "learning_rate": 8.164251971442997e-06, + "loss": 0.2291, + "step": 20416, + "teacher_loss": 0.2076021432876587 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.42003461718559265, + "learning_rate": 8.16223035372611e-06, + "loss": 0.2675, + "step": 20417, + "teacher_loss": 0.25051814317703247 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3556000590324402, + "learning_rate": 8.160208892778775e-06, + "loss": 0.2581, + "step": 20418, + "teacher_loss": 0.2472292184829712 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.7390826940536499, + "learning_rate": 8.158187588647341e-06, + "loss": 0.2276, + "step": 20419, + "teacher_loss": 0.17079684138298035 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3526287078857422, + "learning_rate": 8.156166441378161e-06, + "loss": 0.2206, + "step": 20420, + "teacher_loss": 0.20597627758979797 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.47965970635414124, + "learning_rate": 8.154145451017565e-06, + "loss": 0.1932, + "step": 20421, + "teacher_loss": 0.16135311126708984 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.5041465163230896, + "learning_rate": 8.152124617611876e-06, + "loss": 0.2461, + "step": 20422, + "teacher_loss": 0.21740445494651794 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.20443934202194214, + "learning_rate": 8.150103941207451e-06, + "loss": 0.1898, + "step": 20423, + "teacher_loss": 0.1882147490978241 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.4370315670967102, + "learning_rate": 8.148083421850606e-06, + "loss": 0.2358, + "step": 20424, + "teacher_loss": 0.2134111523628235 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.49659407138824463, + "learning_rate": 8.146063059587657e-06, + "loss": 0.314, + "step": 20425, + "teacher_loss": 0.29369789361953735 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.7438884973526001, + "learning_rate": 8.144042854464936e-06, + "loss": 0.347, + "step": 20426, + "teacher_loss": 0.30293387174606323 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.44723615050315857, + "learning_rate": 8.142022806528761e-06, + "loss": 0.2054, + "step": 20427, + "teacher_loss": 0.17856638133525848 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3505808115005493, + "learning_rate": 8.140002915825437e-06, + "loss": 0.2015, + "step": 20428, + "teacher_loss": 0.18493297696113586 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.22884199023246765, + "learning_rate": 8.137983182401279e-06, + "loss": 0.1508, + "step": 20429, + "teacher_loss": 0.14209480583667755 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.4753621816635132, + "learning_rate": 8.135963606302597e-06, + "loss": 0.2926, + "step": 20430, + "teacher_loss": 0.2723028361797333 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.30431801080703735, + "learning_rate": 8.13394418757569e-06, + "loss": 0.2518, + "step": 20431, + "teacher_loss": 0.24599260091781616 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.9572106003761292, + "learning_rate": 8.131924926266848e-06, + "loss": 0.3521, + "step": 20432, + "teacher_loss": 0.2849164605140686 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.2716759443283081, + "learning_rate": 8.129905822422388e-06, + "loss": 0.2579, + "step": 20433, + "teacher_loss": 0.25639498233795166 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.28855806589126587, + "learning_rate": 8.127886876088589e-06, + "loss": 0.1704, + "step": 20434, + "teacher_loss": 0.15729355812072754 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.3208698630332947, + "learning_rate": 8.125868087311731e-06, + "loss": 0.1982, + "step": 20435, + "teacher_loss": 0.18455222249031067 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.5896419882774353, + "learning_rate": 8.123849456138114e-06, + "loss": 0.2337, + "step": 20436, + "teacher_loss": 0.1941101849079132 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.6188450455665588, + "learning_rate": 8.121830982614014e-06, + "loss": 0.5363, + "step": 20437, + "teacher_loss": 0.527134895324707 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.8942652940750122, + "learning_rate": 8.119812666785704e-06, + "loss": 0.2672, + "step": 20438, + "teacher_loss": 0.19750502705574036 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.47525739669799805, + "learning_rate": 8.117794508699462e-06, + "loss": 0.24, + "step": 20439, + "teacher_loss": 0.21380913257598877 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.11387049406766891, + "learning_rate": 8.115776508401564e-06, + "loss": 0.187, + "step": 20440, + "teacher_loss": 0.19512397050857544 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.36860546469688416, + "learning_rate": 8.113758665938264e-06, + "loss": 0.1866, + "step": 20441, + "teacher_loss": 0.16636237502098083 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.7275445461273193, + "learning_rate": 8.111740981355834e-06, + "loss": 0.2781, + "step": 20442, + "teacher_loss": 0.2282119244337082 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.6481698155403137, + "learning_rate": 8.109723454700533e-06, + "loss": 0.2839, + "step": 20443, + "teacher_loss": 0.24345257878303528 + }, + { + "compression_loss": 0.0, + "epoch": 3.69, + "label_loss": 0.23270158469676971, + "learning_rate": 8.10770608601861e-06, + "loss": 0.1955, + "step": 20444, + "teacher_loss": 0.19136837124824524 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3991568386554718, + "learning_rate": 8.105688875356324e-06, + "loss": 0.2221, + "step": 20445, + "teacher_loss": 0.2023891657590866 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.2909599840641022, + "learning_rate": 8.103671822759928e-06, + "loss": 0.2311, + "step": 20446, + "teacher_loss": 0.22443795204162598 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3990752398967743, + "learning_rate": 8.10165492827566e-06, + "loss": 0.3091, + "step": 20447, + "teacher_loss": 0.29912543296813965 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.2488056868314743, + "learning_rate": 8.099638191949758e-06, + "loss": 0.1994, + "step": 20448, + "teacher_loss": 0.19393882155418396 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.41436147689819336, + "learning_rate": 8.097621613828462e-06, + "loss": 0.2019, + "step": 20449, + "teacher_loss": 0.1782858520746231 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.32092559337615967, + "learning_rate": 8.095605193958013e-06, + "loss": 0.1798, + "step": 20450, + "teacher_loss": 0.16414344310760498 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.23810219764709473, + "learning_rate": 8.093588932384631e-06, + "loss": 0.2024, + "step": 20451, + "teacher_loss": 0.19839242100715637 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4326157867908478, + "learning_rate": 8.09157282915455e-06, + "loss": 0.3352, + "step": 20452, + "teacher_loss": 0.32442739605903625 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.68401700258255, + "learning_rate": 8.089556884313997e-06, + "loss": 0.1915, + "step": 20453, + "teacher_loss": 0.13678179681301117 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.260090708732605, + "learning_rate": 8.08754109790918e-06, + "loss": 0.1551, + "step": 20454, + "teacher_loss": 0.14346373081207275 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5561800003051758, + "learning_rate": 8.085525469986321e-06, + "loss": 0.2263, + "step": 20455, + "teacher_loss": 0.1896296739578247 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.37078726291656494, + "learning_rate": 8.083510000591638e-06, + "loss": 0.38, + "step": 20456, + "teacher_loss": 0.3810608386993408 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5847737789154053, + "learning_rate": 8.081494689771325e-06, + "loss": 0.2577, + "step": 20457, + "teacher_loss": 0.22139260172843933 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.2347414791584015, + "learning_rate": 8.079479537571608e-06, + "loss": 0.1842, + "step": 20458, + "teacher_loss": 0.17853516340255737 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.46124717593193054, + "learning_rate": 8.077464544038666e-06, + "loss": 0.3103, + "step": 20459, + "teacher_loss": 0.29348236322402954 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.7428712844848633, + "learning_rate": 8.07544970921871e-06, + "loss": 0.338, + "step": 20460, + "teacher_loss": 0.29296165704727173 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5688939690589905, + "learning_rate": 8.073435033157934e-06, + "loss": 0.2441, + "step": 20461, + "teacher_loss": 0.20806053280830383 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.43966642022132874, + "learning_rate": 8.071420515902522e-06, + "loss": 0.2762, + "step": 20462, + "teacher_loss": 0.25801873207092285 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3009549677371979, + "learning_rate": 8.069406157498664e-06, + "loss": 0.1771, + "step": 20463, + "teacher_loss": 0.16330209374427795 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.6500614881515503, + "learning_rate": 8.067391957992551e-06, + "loss": 0.3557, + "step": 20464, + "teacher_loss": 0.3230045437812805 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.19565460085868835, + "learning_rate": 8.065377917430348e-06, + "loss": 0.2492, + "step": 20465, + "teacher_loss": 0.2551359534263611 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3056241571903229, + "learning_rate": 8.063364035858238e-06, + "loss": 0.2543, + "step": 20466, + "teacher_loss": 0.24858754873275757 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.2654286026954651, + "learning_rate": 8.0613503133224e-06, + "loss": 0.1768, + "step": 20467, + "teacher_loss": 0.16696037352085114 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.7197321653366089, + "learning_rate": 8.059336749868991e-06, + "loss": 0.28, + "step": 20468, + "teacher_loss": 0.23116309940814972 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.494012713432312, + "learning_rate": 8.057323345544185e-06, + "loss": 0.2069, + "step": 20469, + "teacher_loss": 0.17501267790794373 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.9360468983650208, + "learning_rate": 8.055310100394136e-06, + "loss": 0.2705, + "step": 20470, + "teacher_loss": 0.196604922413826 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4719622731208801, + "learning_rate": 8.053297014465008e-06, + "loss": 0.1503, + "step": 20471, + "teacher_loss": 0.1146031841635704 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3433011472225189, + "learning_rate": 8.051284087802949e-06, + "loss": 0.1677, + "step": 20472, + "teacher_loss": 0.14820224046707153 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.881848931312561, + "learning_rate": 8.049271320454114e-06, + "loss": 0.2858, + "step": 20473, + "teacher_loss": 0.21952764689922333 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5663677453994751, + "learning_rate": 8.047258712464652e-06, + "loss": 0.3023, + "step": 20474, + "teacher_loss": 0.272938996553421 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4826279878616333, + "learning_rate": 8.045246263880695e-06, + "loss": 0.3438, + "step": 20475, + "teacher_loss": 0.32841426134109497 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4675447940826416, + "learning_rate": 8.043233974748392e-06, + "loss": 0.22, + "step": 20476, + "teacher_loss": 0.1925058513879776 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4167604148387909, + "learning_rate": 8.04122184511388e-06, + "loss": 0.2743, + "step": 20477, + "teacher_loss": 0.25842082500457764 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 1.0200319290161133, + "learning_rate": 8.039209875023285e-06, + "loss": 0.2375, + "step": 20478, + "teacher_loss": 0.15051937103271484 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.601428747177124, + "learning_rate": 8.037198064522734e-06, + "loss": 0.2172, + "step": 20479, + "teacher_loss": 0.17456106841564178 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.6416711807250977, + "learning_rate": 8.035186413658364e-06, + "loss": 0.2504, + "step": 20480, + "teacher_loss": 0.20696741342544556 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.333924263715744, + "learning_rate": 8.033174922476288e-06, + "loss": 0.1863, + "step": 20481, + "teacher_loss": 0.1699182242155075 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 1.2532771825790405, + "learning_rate": 8.031163591022613e-06, + "loss": 0.4716, + "step": 20482, + "teacher_loss": 0.38480043411254883 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4358372092247009, + "learning_rate": 8.029152419343472e-06, + "loss": 0.2429, + "step": 20483, + "teacher_loss": 0.22141847014427185 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.35643208026885986, + "learning_rate": 8.02714140748497e-06, + "loss": 0.2264, + "step": 20484, + "teacher_loss": 0.21195971965789795 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4319394826889038, + "learning_rate": 8.025130555493195e-06, + "loss": 0.2865, + "step": 20485, + "teacher_loss": 0.270307332277298 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.25577813386917114, + "learning_rate": 8.023119863414276e-06, + "loss": 0.1547, + "step": 20486, + "teacher_loss": 0.14348259568214417 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.39948272705078125, + "learning_rate": 8.021109331294303e-06, + "loss": 0.2236, + "step": 20487, + "teacher_loss": 0.20410682260990143 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5584471225738525, + "learning_rate": 8.019098959179361e-06, + "loss": 0.2196, + "step": 20488, + "teacher_loss": 0.1819203794002533 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.15208375453948975, + "learning_rate": 8.017088747115554e-06, + "loss": 0.2366, + "step": 20489, + "teacher_loss": 0.2459491640329361 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5692252516746521, + "learning_rate": 8.015078695148968e-06, + "loss": 0.3436, + "step": 20490, + "teacher_loss": 0.31855615973472595 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.47208714485168457, + "learning_rate": 8.01306880332568e-06, + "loss": 0.1907, + "step": 20491, + "teacher_loss": 0.15944108366966248 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.395516961812973, + "learning_rate": 8.011059071691779e-06, + "loss": 0.2957, + "step": 20492, + "teacher_loss": 0.2845984697341919 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3867572546005249, + "learning_rate": 8.009049500293344e-06, + "loss": 0.2827, + "step": 20493, + "teacher_loss": 0.271151602268219 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.4019503891468048, + "learning_rate": 8.007040089176443e-06, + "loss": 0.2116, + "step": 20494, + "teacher_loss": 0.19040986895561218 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.32331639528274536, + "learning_rate": 8.005030838387137e-06, + "loss": 0.2701, + "step": 20495, + "teacher_loss": 0.26420462131500244 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.5309414863586426, + "learning_rate": 8.003021747971513e-06, + "loss": 0.2136, + "step": 20496, + "teacher_loss": 0.17832162976264954 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.6562752723693848, + "learning_rate": 8.001012817975622e-06, + "loss": 0.3282, + "step": 20497, + "teacher_loss": 0.2917705774307251 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.3987181782722473, + "learning_rate": 7.999004048445518e-06, + "loss": 0.1818, + "step": 20498, + "teacher_loss": 0.15769723057746887 + }, + { + "compression_loss": 0.0, + "epoch": 3.7, + "label_loss": 0.2560056447982788, + "learning_rate": 7.996995439427259e-06, + "loss": 0.2161, + "step": 20499, + "teacher_loss": 0.21162426471710205 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.25209033489227295, + "learning_rate": 7.994986990966905e-06, + "loss": 0.2251, + "step": 20500, + "teacher_loss": 0.2221122682094574 + }, + { + "epoch": 3.71, + "eval_exact_match": 80.34058656575213, + "eval_f1": 87.61933188818082, + "step": 20500 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.29122698307037354, + "learning_rate": 7.992978703110492e-06, + "loss": 0.2498, + "step": 20501, + "teacher_loss": 0.2451673299074173 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6233056783676147, + "learning_rate": 7.99097057590407e-06, + "loss": 0.2651, + "step": 20502, + "teacher_loss": 0.22530491650104523 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.39383214712142944, + "learning_rate": 7.988962609393682e-06, + "loss": 0.2576, + "step": 20503, + "teacher_loss": 0.24242591857910156 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5649991035461426, + "learning_rate": 7.986954803625357e-06, + "loss": 0.2137, + "step": 20504, + "teacher_loss": 0.1746564507484436 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5237628221511841, + "learning_rate": 7.984947158645131e-06, + "loss": 0.236, + "step": 20505, + "teacher_loss": 0.20397555828094482 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.34743866324424744, + "learning_rate": 7.982939674499042e-06, + "loss": 0.1408, + "step": 20506, + "teacher_loss": 0.11787965893745422 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.45824772119522095, + "learning_rate": 7.980932351233102e-06, + "loss": 0.2294, + "step": 20507, + "teacher_loss": 0.20394228398799896 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.3159356713294983, + "learning_rate": 7.978925188893344e-06, + "loss": 0.1924, + "step": 20508, + "teacher_loss": 0.17868517339229584 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.747072696685791, + "learning_rate": 7.976918187525775e-06, + "loss": 0.3485, + "step": 20509, + "teacher_loss": 0.304210364818573 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.3645329773426056, + "learning_rate": 7.974911347176422e-06, + "loss": 0.3194, + "step": 20510, + "teacher_loss": 0.3144038915634155 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.2930063009262085, + "learning_rate": 7.972904667891285e-06, + "loss": 0.205, + "step": 20511, + "teacher_loss": 0.19519363343715668 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 1.087515115737915, + "learning_rate": 7.970898149716375e-06, + "loss": 0.2569, + "step": 20512, + "teacher_loss": 0.16464795172214508 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.32083261013031006, + "learning_rate": 7.9688917926977e-06, + "loss": 0.1657, + "step": 20513, + "teacher_loss": 0.14846599102020264 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6017273664474487, + "learning_rate": 7.966885596881252e-06, + "loss": 0.2634, + "step": 20514, + "teacher_loss": 0.22584998607635498 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.3221001923084259, + "learning_rate": 7.964879562313029e-06, + "loss": 0.3057, + "step": 20515, + "teacher_loss": 0.303842157125473 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.33737727999687195, + "learning_rate": 7.962873689039033e-06, + "loss": 0.207, + "step": 20516, + "teacher_loss": 0.19256901741027832 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.20019355416297913, + "learning_rate": 7.96086797710524e-06, + "loss": 0.1745, + "step": 20517, + "teacher_loss": 0.1716061234474182 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6021059155464172, + "learning_rate": 7.958862426557636e-06, + "loss": 0.253, + "step": 20518, + "teacher_loss": 0.21420764923095703 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.29106423258781433, + "learning_rate": 7.956857037442215e-06, + "loss": 0.2019, + "step": 20519, + "teacher_loss": 0.192010298371315 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5370165109634399, + "learning_rate": 7.954851809804938e-06, + "loss": 0.2338, + "step": 20520, + "teacher_loss": 0.2000667005777359 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.214020237326622, + "learning_rate": 7.952846743691794e-06, + "loss": 0.1847, + "step": 20521, + "teacher_loss": 0.18139652907848358 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.40170636773109436, + "learning_rate": 7.95084183914874e-06, + "loss": 0.2252, + "step": 20522, + "teacher_loss": 0.2055548131465912 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.701139509677887, + "learning_rate": 7.948837096221747e-06, + "loss": 0.2793, + "step": 20523, + "teacher_loss": 0.23241330683231354 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.4164860248565674, + "learning_rate": 7.946832514956785e-06, + "loss": 0.2416, + "step": 20524, + "teacher_loss": 0.2222120463848114 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6345788240432739, + "learning_rate": 7.944828095399802e-06, + "loss": 0.3216, + "step": 20525, + "teacher_loss": 0.2868001461029053 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.38317978382110596, + "learning_rate": 7.942823837596757e-06, + "loss": 0.2723, + "step": 20526, + "teacher_loss": 0.2599836587905884 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6318536400794983, + "learning_rate": 7.94081974159361e-06, + "loss": 0.3076, + "step": 20527, + "teacher_loss": 0.2715749740600586 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.29888448119163513, + "learning_rate": 7.938815807436294e-06, + "loss": 0.2304, + "step": 20528, + "teacher_loss": 0.2227899730205536 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.2001003623008728, + "learning_rate": 7.936812035170764e-06, + "loss": 0.2493, + "step": 20529, + "teacher_loss": 0.2547980546951294 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.859743058681488, + "learning_rate": 7.93480842484296e-06, + "loss": 0.255, + "step": 20530, + "teacher_loss": 0.18780821561813354 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5746020078659058, + "learning_rate": 7.932804976498817e-06, + "loss": 0.4405, + "step": 20531, + "teacher_loss": 0.42557328939437866 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.359576940536499, + "learning_rate": 7.930801690184265e-06, + "loss": 0.2297, + "step": 20532, + "teacher_loss": 0.21526750922203064 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.44806063175201416, + "learning_rate": 7.928798565945232e-06, + "loss": 0.2501, + "step": 20533, + "teacher_loss": 0.22809848189353943 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6142005324363708, + "learning_rate": 7.926795603827655e-06, + "loss": 0.2143, + "step": 20534, + "teacher_loss": 0.16982506215572357 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.4922184944152832, + "learning_rate": 7.92479280387744e-06, + "loss": 0.2158, + "step": 20535, + "teacher_loss": 0.18510878086090088 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.41362863779067993, + "learning_rate": 7.922790166140516e-06, + "loss": 0.3335, + "step": 20536, + "teacher_loss": 0.3245932459831238 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5203946828842163, + "learning_rate": 7.9207876906628e-06, + "loss": 0.1985, + "step": 20537, + "teacher_loss": 0.1627473533153534 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.4432861804962158, + "learning_rate": 7.91878537749019e-06, + "loss": 0.207, + "step": 20538, + "teacher_loss": 0.18079635500907898 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6002190709114075, + "learning_rate": 7.916783226668602e-06, + "loss": 0.2991, + "step": 20539, + "teacher_loss": 0.2655889689922333 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.16795042157173157, + "learning_rate": 7.914781238243945e-06, + "loss": 0.1573, + "step": 20540, + "teacher_loss": 0.15615800023078918 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.4106191098690033, + "learning_rate": 7.912779412262105e-06, + "loss": 0.1852, + "step": 20541, + "teacher_loss": 0.16012956202030182 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.4219675064086914, + "learning_rate": 7.910777748768986e-06, + "loss": 0.2838, + "step": 20542, + "teacher_loss": 0.2684195041656494 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5335960388183594, + "learning_rate": 7.908776247810482e-06, + "loss": 0.2045, + "step": 20543, + "teacher_loss": 0.16792264580726624 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.3978830575942993, + "learning_rate": 7.90677490943248e-06, + "loss": 0.2129, + "step": 20544, + "teacher_loss": 0.19235210120677948 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.5176531076431274, + "learning_rate": 7.904773733680847e-06, + "loss": 0.2869, + "step": 20545, + "teacher_loss": 0.261284202337265 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.2776418924331665, + "learning_rate": 7.902772720601498e-06, + "loss": 0.1778, + "step": 20546, + "teacher_loss": 0.1666685789823532 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6594192981719971, + "learning_rate": 7.900771870240286e-06, + "loss": 0.2932, + "step": 20547, + "teacher_loss": 0.2525408864021301 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.3557298481464386, + "learning_rate": 7.898771182643087e-06, + "loss": 0.1839, + "step": 20548, + "teacher_loss": 0.16476333141326904 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.2247595489025116, + "learning_rate": 7.896770657855774e-06, + "loss": 0.2201, + "step": 20549, + "teacher_loss": 0.2195352166891098 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.7136620283126831, + "learning_rate": 7.89477029592422e-06, + "loss": 0.3431, + "step": 20550, + "teacher_loss": 0.3018948435783386 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.48758912086486816, + "learning_rate": 7.892770096894274e-06, + "loss": 0.2377, + "step": 20551, + "teacher_loss": 0.2099277526140213 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.6213744878768921, + "learning_rate": 7.8907700608118e-06, + "loss": 0.3432, + "step": 20552, + "teacher_loss": 0.3123405873775482 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.49746182560920715, + "learning_rate": 7.888770187722663e-06, + "loss": 0.2357, + "step": 20553, + "teacher_loss": 0.20666344463825226 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.34353911876678467, + "learning_rate": 7.886770477672703e-06, + "loss": 0.2366, + "step": 20554, + "teacher_loss": 0.22466708719730377 + }, + { + "compression_loss": 0.0, + "epoch": 3.71, + "label_loss": 0.4736207127571106, + "learning_rate": 7.884770930707757e-06, + "loss": 0.2277, + "step": 20555, + "teacher_loss": 0.20042431354522705 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.40838760137557983, + "learning_rate": 7.88277154687369e-06, + "loss": 0.2281, + "step": 20556, + "teacher_loss": 0.20807047188282013 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5997644066810608, + "learning_rate": 7.88077232621634e-06, + "loss": 0.2878, + "step": 20557, + "teacher_loss": 0.253089040517807 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.47178590297698975, + "learning_rate": 7.878773268781526e-06, + "loss": 0.1858, + "step": 20558, + "teacher_loss": 0.15402144193649292 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.4570574164390564, + "learning_rate": 7.87677437461509e-06, + "loss": 0.2323, + "step": 20559, + "teacher_loss": 0.20727333426475525 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.551131546497345, + "learning_rate": 7.874775643762868e-06, + "loss": 0.2804, + "step": 20560, + "teacher_loss": 0.25032323598861694 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.33984100818634033, + "learning_rate": 7.872777076270669e-06, + "loss": 0.2632, + "step": 20561, + "teacher_loss": 0.25465041399002075 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5567530393600464, + "learning_rate": 7.870778672184328e-06, + "loss": 0.2427, + "step": 20562, + "teacher_loss": 0.20775152742862701 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.35740435123443604, + "learning_rate": 7.86878043154966e-06, + "loss": 0.2136, + "step": 20563, + "teacher_loss": 0.19762060046195984 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.40884658694267273, + "learning_rate": 7.866782354412471e-06, + "loss": 0.3155, + "step": 20564, + "teacher_loss": 0.305100679397583 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.268097460269928, + "learning_rate": 7.864784440818578e-06, + "loss": 0.2241, + "step": 20565, + "teacher_loss": 0.2192152738571167 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.4740188717842102, + "learning_rate": 7.862786690813789e-06, + "loss": 0.2478, + "step": 20566, + "teacher_loss": 0.22263957560062408 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.43564820289611816, + "learning_rate": 7.860789104443897e-06, + "loss": 0.3101, + "step": 20567, + "teacher_loss": 0.2961238622665405 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 1.0727430582046509, + "learning_rate": 7.858791681754707e-06, + "loss": 0.4854, + "step": 20568, + "teacher_loss": 0.42014801502227783 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.6135869026184082, + "learning_rate": 7.85679442279202e-06, + "loss": 0.323, + "step": 20569, + "teacher_loss": 0.290690541267395 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.3265843391418457, + "learning_rate": 7.854797327601614e-06, + "loss": 0.2562, + "step": 20570, + "teacher_loss": 0.2483462244272232 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5780649185180664, + "learning_rate": 7.852800396229286e-06, + "loss": 0.2737, + "step": 20571, + "teacher_loss": 0.23989325761795044 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.40878117084503174, + "learning_rate": 7.850803628720814e-06, + "loss": 0.3163, + "step": 20572, + "teacher_loss": 0.3060475289821625 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.388255774974823, + "learning_rate": 7.848807025121985e-06, + "loss": 0.2648, + "step": 20573, + "teacher_loss": 0.2510680556297302 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.3706388473510742, + "learning_rate": 7.846810585478565e-06, + "loss": 0.1865, + "step": 20574, + "teacher_loss": 0.1660592257976532 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.4351791739463806, + "learning_rate": 7.844814309836334e-06, + "loss": 0.275, + "step": 20575, + "teacher_loss": 0.2572292685508728 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.23523324728012085, + "learning_rate": 7.84281819824106e-06, + "loss": 0.1884, + "step": 20576, + "teacher_loss": 0.18315139412879944 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.6651009321212769, + "learning_rate": 7.840822250738503e-06, + "loss": 0.3068, + "step": 20577, + "teacher_loss": 0.26703932881355286 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5947476625442505, + "learning_rate": 7.838826467374426e-06, + "loss": 0.2314, + "step": 20578, + "teacher_loss": 0.1910579800605774 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.3304585814476013, + "learning_rate": 7.836830848194597e-06, + "loss": 0.2398, + "step": 20579, + "teacher_loss": 0.2297612428665161 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.6935474872589111, + "learning_rate": 7.83483539324475e-06, + "loss": 0.2373, + "step": 20580, + "teacher_loss": 0.18655851483345032 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.7470519542694092, + "learning_rate": 7.832840102570655e-06, + "loss": 0.3217, + "step": 20581, + "teacher_loss": 0.2744293510913849 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.45566248893737793, + "learning_rate": 7.83084497621804e-06, + "loss": 0.2921, + "step": 20582, + "teacher_loss": 0.2738826274871826 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.23319372534751892, + "learning_rate": 7.828850014232656e-06, + "loss": 0.1893, + "step": 20583, + "teacher_loss": 0.18445329368114471 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.6765554547309875, + "learning_rate": 7.826855216660247e-06, + "loss": 0.3197, + "step": 20584, + "teacher_loss": 0.2800275683403015 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.2504131495952606, + "learning_rate": 7.824860583546536e-06, + "loss": 0.1816, + "step": 20585, + "teacher_loss": 0.1739581823348999 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5527567863464355, + "learning_rate": 7.82286611493726e-06, + "loss": 0.2781, + "step": 20586, + "teacher_loss": 0.24757763743400574 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.32621315121650696, + "learning_rate": 7.820871810878151e-06, + "loss": 0.2305, + "step": 20587, + "teacher_loss": 0.21982556581497192 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.3054104745388031, + "learning_rate": 7.81887767141492e-06, + "loss": 0.2513, + "step": 20588, + "teacher_loss": 0.24533875286579132 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.4608084559440613, + "learning_rate": 7.816883696593297e-06, + "loss": 0.3438, + "step": 20589, + "teacher_loss": 0.3308277726173401 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.7486361265182495, + "learning_rate": 7.814889886458999e-06, + "loss": 0.5455, + "step": 20590, + "teacher_loss": 0.5228923559188843 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.2961157560348511, + "learning_rate": 7.812896241057728e-06, + "loss": 0.2293, + "step": 20591, + "teacher_loss": 0.22187002003192902 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.46913662552833557, + "learning_rate": 7.810902760435198e-06, + "loss": 0.2862, + "step": 20592, + "teacher_loss": 0.26582786440849304 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5202335715293884, + "learning_rate": 7.808909444637119e-06, + "loss": 0.2352, + "step": 20593, + "teacher_loss": 0.20353004336357117 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.25242379307746887, + "learning_rate": 7.806916293709188e-06, + "loss": 0.3006, + "step": 20594, + "teacher_loss": 0.3059537410736084 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.36521148681640625, + "learning_rate": 7.804923307697092e-06, + "loss": 0.2378, + "step": 20595, + "teacher_loss": 0.22366078197956085 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.24222902953624725, + "learning_rate": 7.802930486646534e-06, + "loss": 0.1539, + "step": 20596, + "teacher_loss": 0.14406165480613708 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.2768211364746094, + "learning_rate": 7.800937830603208e-06, + "loss": 0.1774, + "step": 20597, + "teacher_loss": 0.16636043787002563 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.4194197356700897, + "learning_rate": 7.798945339612787e-06, + "loss": 0.2837, + "step": 20598, + "teacher_loss": 0.2685825824737549 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.48154306411743164, + "learning_rate": 7.796953013720961e-06, + "loss": 0.2673, + "step": 20599, + "teacher_loss": 0.24351122975349426 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.439342200756073, + "learning_rate": 7.794960852973413e-06, + "loss": 0.2898, + "step": 20600, + "teacher_loss": 0.27313506603240967 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.39674484729766846, + "learning_rate": 7.792968857415803e-06, + "loss": 0.1639, + "step": 20601, + "teacher_loss": 0.13806486129760742 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.5691717267036438, + "learning_rate": 7.790977027093808e-06, + "loss": 0.3161, + "step": 20602, + "teacher_loss": 0.28793853521347046 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.21447071433067322, + "learning_rate": 7.788985362053105e-06, + "loss": 0.2644, + "step": 20603, + "teacher_loss": 0.26990869641304016 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.7089633941650391, + "learning_rate": 7.786993862339347e-06, + "loss": 0.2653, + "step": 20604, + "teacher_loss": 0.21601277589797974 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.8200685977935791, + "learning_rate": 7.785002527998182e-06, + "loss": 0.3423, + "step": 20605, + "teacher_loss": 0.2891594469547272 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.2618734538555145, + "learning_rate": 7.78301135907529e-06, + "loss": 0.2311, + "step": 20606, + "teacher_loss": 0.2276328057050705 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.7955800294876099, + "learning_rate": 7.781020355616309e-06, + "loss": 0.3536, + "step": 20607, + "teacher_loss": 0.30448412895202637 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.612231969833374, + "learning_rate": 7.779029517666873e-06, + "loss": 0.2833, + "step": 20608, + "teacher_loss": 0.2466975748538971 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.48443174362182617, + "learning_rate": 7.777038845272656e-06, + "loss": 0.4876, + "step": 20609, + "teacher_loss": 0.48799023032188416 + }, + { + "compression_loss": 0.0, + "epoch": 3.72, + "label_loss": 0.3143419027328491, + "learning_rate": 7.775048338479282e-06, + "loss": 0.2213, + "step": 20610, + "teacher_loss": 0.2109655737876892 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.8374941945075989, + "learning_rate": 7.773057997332384e-06, + "loss": 0.2836, + "step": 20611, + "teacher_loss": 0.22203822433948517 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.8722686767578125, + "learning_rate": 7.771067821877594e-06, + "loss": 0.2804, + "step": 20612, + "teacher_loss": 0.214582160115242 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.4468388259410858, + "learning_rate": 7.769077812160555e-06, + "loss": 0.2787, + "step": 20613, + "teacher_loss": 0.2599703073501587 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.9476112127304077, + "learning_rate": 7.767087968226875e-06, + "loss": 0.2766, + "step": 20614, + "teacher_loss": 0.20204895734786987 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.34413790702819824, + "learning_rate": 7.765098290122182e-06, + "loss": 0.1868, + "step": 20615, + "teacher_loss": 0.1693127453327179 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.34676483273506165, + "learning_rate": 7.763108777892101e-06, + "loss": 0.3059, + "step": 20616, + "teacher_loss": 0.3013885021209717 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2990168631076813, + "learning_rate": 7.76111943158224e-06, + "loss": 0.1938, + "step": 20617, + "teacher_loss": 0.1820741593837738 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.3342002332210541, + "learning_rate": 7.759130251238194e-06, + "loss": 0.2255, + "step": 20618, + "teacher_loss": 0.21344928443431854 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.25176236033439636, + "learning_rate": 7.75714123690559e-06, + "loss": 0.2296, + "step": 20619, + "teacher_loss": 0.22711308300495148 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.46062982082366943, + "learning_rate": 7.755152388630026e-06, + "loss": 0.2612, + "step": 20620, + "teacher_loss": 0.23902156949043274 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.5796553492546082, + "learning_rate": 7.75316370645709e-06, + "loss": 0.2774, + "step": 20621, + "teacher_loss": 0.24383430182933807 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.39046692848205566, + "learning_rate": 7.751175190432383e-06, + "loss": 0.2098, + "step": 20622, + "teacher_loss": 0.18975865840911865 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.695408821105957, + "learning_rate": 7.749186840601503e-06, + "loss": 0.1808, + "step": 20623, + "teacher_loss": 0.12367603927850723 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2626107931137085, + "learning_rate": 7.747198657010022e-06, + "loss": 0.1788, + "step": 20624, + "teacher_loss": 0.16947954893112183 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.6986124515533447, + "learning_rate": 7.745210639703533e-06, + "loss": 0.2719, + "step": 20625, + "teacher_loss": 0.2245016098022461 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.8883216977119446, + "learning_rate": 7.743222788727616e-06, + "loss": 0.3389, + "step": 20626, + "teacher_loss": 0.2778290808200836 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.6360207796096802, + "learning_rate": 7.74123510412784e-06, + "loss": 0.2562, + "step": 20627, + "teacher_loss": 0.21398407220840454 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2795759439468384, + "learning_rate": 7.739247585949779e-06, + "loss": 0.2054, + "step": 20628, + "teacher_loss": 0.19721169769763947 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.6199157238006592, + "learning_rate": 7.737260234239006e-06, + "loss": 0.2707, + "step": 20629, + "teacher_loss": 0.23194241523742676 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.31909239292144775, + "learning_rate": 7.735273049041078e-06, + "loss": 0.1741, + "step": 20630, + "teacher_loss": 0.15800046920776367 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.6406176090240479, + "learning_rate": 7.733286030401564e-06, + "loss": 0.2035, + "step": 20631, + "teacher_loss": 0.1549123078584671 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2993151545524597, + "learning_rate": 7.731299178366008e-06, + "loss": 0.1449, + "step": 20632, + "teacher_loss": 0.1277957260608673 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.4320480227470398, + "learning_rate": 7.729312492979967e-06, + "loss": 0.1793, + "step": 20633, + "teacher_loss": 0.1511736512184143 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.4288039207458496, + "learning_rate": 7.727325974289e-06, + "loss": 0.2501, + "step": 20634, + "teacher_loss": 0.23027953505516052 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.3129156827926636, + "learning_rate": 7.725339622338639e-06, + "loss": 0.2408, + "step": 20635, + "teacher_loss": 0.2328205406665802 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2605130672454834, + "learning_rate": 7.723353437174435e-06, + "loss": 0.1698, + "step": 20636, + "teacher_loss": 0.1597018539905548 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.446674108505249, + "learning_rate": 7.721367418841914e-06, + "loss": 0.2766, + "step": 20637, + "teacher_loss": 0.257689893245697 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.6010599136352539, + "learning_rate": 7.719381567386615e-06, + "loss": 0.3327, + "step": 20638, + "teacher_loss": 0.3028751611709595 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.352644681930542, + "learning_rate": 7.717395882854075e-06, + "loss": 0.2538, + "step": 20639, + "teacher_loss": 0.2428404688835144 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.6668270826339722, + "learning_rate": 7.715410365289806e-06, + "loss": 0.2157, + "step": 20640, + "teacher_loss": 0.1655881702899933 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.32208502292633057, + "learning_rate": 7.713425014739337e-06, + "loss": 0.2557, + "step": 20641, + "teacher_loss": 0.248269185423851 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2823813557624817, + "learning_rate": 7.711439831248192e-06, + "loss": 0.1525, + "step": 20642, + "teacher_loss": 0.13801613450050354 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.4927947223186493, + "learning_rate": 7.709454814861876e-06, + "loss": 0.3049, + "step": 20643, + "teacher_loss": 0.28396880626678467 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.552959680557251, + "learning_rate": 7.707469965625907e-06, + "loss": 0.3861, + "step": 20644, + "teacher_loss": 0.3675660789012909 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.14102678000926971, + "learning_rate": 7.70548528358578e-06, + "loss": 0.17, + "step": 20645, + "teacher_loss": 0.17317049205303192 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.5988131761550903, + "learning_rate": 7.703500768787008e-06, + "loss": 0.2357, + "step": 20646, + "teacher_loss": 0.19538281857967377 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.503410816192627, + "learning_rate": 7.701516421275092e-06, + "loss": 0.2283, + "step": 20647, + "teacher_loss": 0.197728231549263 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2142864167690277, + "learning_rate": 7.699532241095518e-06, + "loss": 0.2193, + "step": 20648, + "teacher_loss": 0.2198556363582611 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.44591790437698364, + "learning_rate": 7.69754822829378e-06, + "loss": 0.2409, + "step": 20649, + "teacher_loss": 0.21816131472587585 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.40563446283340454, + "learning_rate": 7.695564382915374e-06, + "loss": 0.1998, + "step": 20650, + "teacher_loss": 0.17687420547008514 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.28687727451324463, + "learning_rate": 7.69358070500577e-06, + "loss": 0.2426, + "step": 20651, + "teacher_loss": 0.23770327866077423 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.626595139503479, + "learning_rate": 7.691597194610456e-06, + "loss": 0.232, + "step": 20652, + "teacher_loss": 0.18810918927192688 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.3314914107322693, + "learning_rate": 7.689613851774911e-06, + "loss": 0.1745, + "step": 20653, + "teacher_loss": 0.15703517198562622 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.4683282971382141, + "learning_rate": 7.687630676544605e-06, + "loss": 0.2813, + "step": 20654, + "teacher_loss": 0.2605719268321991 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.9122825860977173, + "learning_rate": 7.685647668964988e-06, + "loss": 0.2831, + "step": 20655, + "teacher_loss": 0.21318268775939941 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.4241066873073578, + "learning_rate": 7.683664829081557e-06, + "loss": 0.2378, + "step": 20656, + "teacher_loss": 0.2171216458082199 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 1.0855133533477783, + "learning_rate": 7.681682156939752e-06, + "loss": 0.3129, + "step": 20657, + "teacher_loss": 0.22705848515033722 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.2217554748058319, + "learning_rate": 7.67969965258503e-06, + "loss": 0.1764, + "step": 20658, + "teacher_loss": 0.1713918000459671 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.3536851406097412, + "learning_rate": 7.677717316062849e-06, + "loss": 0.2638, + "step": 20659, + "teacher_loss": 0.2537705898284912 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.27580326795578003, + "learning_rate": 7.67573514741866e-06, + "loss": 0.2091, + "step": 20660, + "teacher_loss": 0.20165014266967773 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.5255050659179688, + "learning_rate": 7.6737531466979e-06, + "loss": 0.2815, + "step": 20661, + "teacher_loss": 0.25439292192459106 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.46773040294647217, + "learning_rate": 7.671771313946015e-06, + "loss": 0.223, + "step": 20662, + "teacher_loss": 0.1957968771457672 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.32312217354774475, + "learning_rate": 7.669789649208449e-06, + "loss": 0.258, + "step": 20663, + "teacher_loss": 0.25072628259658813 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.3443107604980469, + "learning_rate": 7.667808152530622e-06, + "loss": 0.1874, + "step": 20664, + "teacher_loss": 0.16992640495300293 + }, + { + "compression_loss": 0.0, + "epoch": 3.73, + "label_loss": 0.618716835975647, + "learning_rate": 7.66582682395797e-06, + "loss": 0.2242, + "step": 20665, + "teacher_loss": 0.180341899394989 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3502388894557953, + "learning_rate": 7.663845663535928e-06, + "loss": 0.1764, + "step": 20666, + "teacher_loss": 0.15708932280540466 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.4204500913619995, + "learning_rate": 7.661864671309908e-06, + "loss": 0.246, + "step": 20667, + "teacher_loss": 0.22665318846702576 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.436565101146698, + "learning_rate": 7.65988384732532e-06, + "loss": 0.2139, + "step": 20668, + "teacher_loss": 0.18915501236915588 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3638366460800171, + "learning_rate": 7.6579031916276e-06, + "loss": 0.2104, + "step": 20669, + "teacher_loss": 0.19330652058124542 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.5326085686683655, + "learning_rate": 7.65592270426215e-06, + "loss": 0.1972, + "step": 20670, + "teacher_loss": 0.15993425250053406 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.35313689708709717, + "learning_rate": 7.653942385274362e-06, + "loss": 0.2106, + "step": 20671, + "teacher_loss": 0.19481004774570465 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.48107102513313293, + "learning_rate": 7.651962234709655e-06, + "loss": 0.4124, + "step": 20672, + "teacher_loss": 0.404815673828125 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.6386967897415161, + "learning_rate": 7.649982252613428e-06, + "loss": 0.2845, + "step": 20673, + "teacher_loss": 0.24518311023712158 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.4580540060997009, + "learning_rate": 7.648002439031065e-06, + "loss": 0.2202, + "step": 20674, + "teacher_loss": 0.19380971789360046 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.6248688101768494, + "learning_rate": 7.646022794007966e-06, + "loss": 0.359, + "step": 20675, + "teacher_loss": 0.3294230103492737 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.40935218334198, + "learning_rate": 7.64404331758952e-06, + "loss": 0.1811, + "step": 20676, + "teacher_loss": 0.15575991570949554 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.4022122919559479, + "learning_rate": 7.642064009821106e-06, + "loss": 0.2564, + "step": 20677, + "teacher_loss": 0.24018919467926025 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.34272414445877075, + "learning_rate": 7.64008487074809e-06, + "loss": 0.1919, + "step": 20678, + "teacher_loss": 0.17516404390335083 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.36190497875213623, + "learning_rate": 7.638105900415878e-06, + "loss": 0.232, + "step": 20679, + "teacher_loss": 0.21755488216876984 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.4359971284866333, + "learning_rate": 7.636127098869824e-06, + "loss": 0.2478, + "step": 20680, + "teacher_loss": 0.22691220045089722 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.42522379755973816, + "learning_rate": 7.634148466155292e-06, + "loss": 0.1884, + "step": 20681, + "teacher_loss": 0.16203323006629944 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.31441712379455566, + "learning_rate": 7.632170002317649e-06, + "loss": 0.2581, + "step": 20682, + "teacher_loss": 0.2518788278102875 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.4209899306297302, + "learning_rate": 7.630191707402267e-06, + "loss": 0.2544, + "step": 20683, + "teacher_loss": 0.23583492636680603 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.44638535380363464, + "learning_rate": 7.628213581454485e-06, + "loss": 0.3165, + "step": 20684, + "teacher_loss": 0.30206233263015747 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3177550733089447, + "learning_rate": 7.626235624519665e-06, + "loss": 0.2349, + "step": 20685, + "teacher_loss": 0.22569599747657776 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.38468509912490845, + "learning_rate": 7.624257836643162e-06, + "loss": 0.213, + "step": 20686, + "teacher_loss": 0.19397558271884918 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.41876256465911865, + "learning_rate": 7.6222802178703064e-06, + "loss": 0.2505, + "step": 20687, + "teacher_loss": 0.23175761103630066 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.2677580416202545, + "learning_rate": 7.620302768246445e-06, + "loss": 0.183, + "step": 20688, + "teacher_loss": 0.1735418140888214 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3936900794506073, + "learning_rate": 7.618325487816921e-06, + "loss": 0.2039, + "step": 20689, + "teacher_loss": 0.18284907937049866 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.49180394411087036, + "learning_rate": 7.616348376627057e-06, + "loss": 0.2276, + "step": 20690, + "teacher_loss": 0.19824816286563873 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.7609177231788635, + "learning_rate": 7.614371434722187e-06, + "loss": 0.3484, + "step": 20691, + "teacher_loss": 0.30254560708999634 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3338005542755127, + "learning_rate": 7.612394662147643e-06, + "loss": 0.2378, + "step": 20692, + "teacher_loss": 0.2271246314048767 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.6402816772460938, + "learning_rate": 7.6104180589487354e-06, + "loss": 0.325, + "step": 20693, + "teacher_loss": 0.28994888067245483 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.45723778009414673, + "learning_rate": 7.608441625170791e-06, + "loss": 0.2839, + "step": 20694, + "teacher_loss": 0.2646586298942566 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.38231393694877625, + "learning_rate": 7.606465360859113e-06, + "loss": 0.2183, + "step": 20695, + "teacher_loss": 0.20008337497711182 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.7269190549850464, + "learning_rate": 7.604489266059019e-06, + "loss": 0.4847, + "step": 20696, + "teacher_loss": 0.45775866508483887 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.36590510606765747, + "learning_rate": 7.602513340815817e-06, + "loss": 0.2195, + "step": 20697, + "teacher_loss": 0.2031954973936081 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3498404324054718, + "learning_rate": 7.600537585174802e-06, + "loss": 0.1947, + "step": 20698, + "teacher_loss": 0.1774539351463318 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3963054418563843, + "learning_rate": 7.598561999181278e-06, + "loss": 0.2276, + "step": 20699, + "teacher_loss": 0.20884516835212708 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.43781328201293945, + "learning_rate": 7.596586582880535e-06, + "loss": 0.287, + "step": 20700, + "teacher_loss": 0.2702672779560089 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3699612617492676, + "learning_rate": 7.5946113363178615e-06, + "loss": 0.1846, + "step": 20701, + "teacher_loss": 0.16404679417610168 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.29414916038513184, + "learning_rate": 7.592636259538556e-06, + "loss": 0.2136, + "step": 20702, + "teacher_loss": 0.20467005670070648 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.19703754782676697, + "learning_rate": 7.5906613525878845e-06, + "loss": 0.1851, + "step": 20703, + "teacher_loss": 0.18382486701011658 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.3419671952724457, + "learning_rate": 7.588686615511141e-06, + "loss": 0.295, + "step": 20704, + "teacher_loss": 0.28976479172706604 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.5558841228485107, + "learning_rate": 7.5867120483535865e-06, + "loss": 0.2365, + "step": 20705, + "teacher_loss": 0.20102283358573914 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.2001648098230362, + "learning_rate": 7.584737651160498e-06, + "loss": 0.1912, + "step": 20706, + "teacher_loss": 0.19014906883239746 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.2834233343601227, + "learning_rate": 7.5827634239771495e-06, + "loss": 0.1782, + "step": 20707, + "teacher_loss": 0.16655324399471283 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 1.2182042598724365, + "learning_rate": 7.580789366848794e-06, + "loss": 0.2955, + "step": 20708, + "teacher_loss": 0.19293151795864105 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.8388482332229614, + "learning_rate": 7.578815479820692e-06, + "loss": 0.2043, + "step": 20709, + "teacher_loss": 0.13382437825202942 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.15370246767997742, + "learning_rate": 7.576841762938108e-06, + "loss": 0.1682, + "step": 20710, + "teacher_loss": 0.16980718076229095 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.7117568254470825, + "learning_rate": 7.574868216246279e-06, + "loss": 0.244, + "step": 20711, + "teacher_loss": 0.19198210537433624 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.17348745465278625, + "learning_rate": 7.572894839790462e-06, + "loss": 0.1884, + "step": 20712, + "teacher_loss": 0.19007861614227295 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.9110016822814941, + "learning_rate": 7.570921633615904e-06, + "loss": 0.3511, + "step": 20713, + "teacher_loss": 0.2889332175254822 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.6120614409446716, + "learning_rate": 7.568948597767833e-06, + "loss": 0.2009, + "step": 20714, + "teacher_loss": 0.1552184671163559 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.2810188829898834, + "learning_rate": 7.566975732291491e-06, + "loss": 0.2074, + "step": 20715, + "teacher_loss": 0.19925513863563538 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.49374228715896606, + "learning_rate": 7.565003037232119e-06, + "loss": 0.2664, + "step": 20716, + "teacher_loss": 0.24116943776607513 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.25371938943862915, + "learning_rate": 7.563030512634932e-06, + "loss": 0.2118, + "step": 20717, + "teacher_loss": 0.20717597007751465 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.7613872289657593, + "learning_rate": 7.561058158545153e-06, + "loss": 0.2676, + "step": 20718, + "teacher_loss": 0.21278022229671478 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.14412908256053925, + "learning_rate": 7.55908597500801e-06, + "loss": 0.1399, + "step": 20719, + "teacher_loss": 0.13938085734844208 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.38786977529525757, + "learning_rate": 7.557113962068721e-06, + "loss": 0.3816, + "step": 20720, + "teacher_loss": 0.38089269399642944 + }, + { + "compression_loss": 0.0, + "epoch": 3.74, + "label_loss": 0.26191383600234985, + "learning_rate": 7.555142119772488e-06, + "loss": 0.1478, + "step": 20721, + "teacher_loss": 0.13510656356811523 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4136655330657959, + "learning_rate": 7.553170448164524e-06, + "loss": 0.276, + "step": 20722, + "teacher_loss": 0.260692298412323 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.25625112652778625, + "learning_rate": 7.551198947290043e-06, + "loss": 0.1773, + "step": 20723, + "teacher_loss": 0.16848014295101166 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.23818835616111755, + "learning_rate": 7.549227617194231e-06, + "loss": 0.1623, + "step": 20724, + "teacher_loss": 0.15389969944953918 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.40371131896972656, + "learning_rate": 7.547256457922291e-06, + "loss": 0.2064, + "step": 20725, + "teacher_loss": 0.18444772064685822 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.3765753507614136, + "learning_rate": 7.54528546951942e-06, + "loss": 0.2419, + "step": 20726, + "teacher_loss": 0.22689582407474518 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.5859529376029968, + "learning_rate": 7.5433146520308045e-06, + "loss": 0.2196, + "step": 20727, + "teacher_loss": 0.17893315851688385 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.244625985622406, + "learning_rate": 7.541344005501617e-06, + "loss": 0.1744, + "step": 20728, + "teacher_loss": 0.16655270755290985 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.2897653579711914, + "learning_rate": 7.539373529977059e-06, + "loss": 0.142, + "step": 20729, + "teacher_loss": 0.12561453878879547 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4354771673679352, + "learning_rate": 7.5374032255022975e-06, + "loss": 0.2498, + "step": 20730, + "teacher_loss": 0.2292238026857376 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.3942147493362427, + "learning_rate": 7.535433092122496e-06, + "loss": 0.2816, + "step": 20731, + "teacher_loss": 0.26903682947158813 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.244733527302742, + "learning_rate": 7.533463129882844e-06, + "loss": 0.2135, + "step": 20732, + "teacher_loss": 0.21001559495925903 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4936160147190094, + "learning_rate": 7.531493338828499e-06, + "loss": 0.3877, + "step": 20733, + "teacher_loss": 0.3759553134441376 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6860644817352295, + "learning_rate": 7.529523719004612e-06, + "loss": 0.3148, + "step": 20734, + "teacher_loss": 0.2735133171081543 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.624211311340332, + "learning_rate": 7.527554270456349e-06, + "loss": 0.272, + "step": 20735, + "teacher_loss": 0.23286965489387512 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4572480320930481, + "learning_rate": 7.525584993228869e-06, + "loss": 0.1802, + "step": 20736, + "teacher_loss": 0.14945730566978455 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.32000505924224854, + "learning_rate": 7.52361588736731e-06, + "loss": 0.1868, + "step": 20737, + "teacher_loss": 0.17198756337165833 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.5569369792938232, + "learning_rate": 7.521646952916823e-06, + "loss": 0.2913, + "step": 20738, + "teacher_loss": 0.26179802417755127 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.36998504400253296, + "learning_rate": 7.519678189922555e-06, + "loss": 0.2243, + "step": 20739, + "teacher_loss": 0.20809680223464966 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4210659861564636, + "learning_rate": 7.517709598429641e-06, + "loss": 0.2203, + "step": 20740, + "teacher_loss": 0.19799906015396118 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.2743741273880005, + "learning_rate": 7.5157411784832e-06, + "loss": 0.1695, + "step": 20741, + "teacher_loss": 0.15788379311561584 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.49082091450691223, + "learning_rate": 7.513772930128386e-06, + "loss": 0.2051, + "step": 20742, + "teacher_loss": 0.1733470857143402 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6030537486076355, + "learning_rate": 7.511804853410314e-06, + "loss": 0.281, + "step": 20743, + "teacher_loss": 0.24524493515491486 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4425712823867798, + "learning_rate": 7.509836948374102e-06, + "loss": 0.2721, + "step": 20744, + "teacher_loss": 0.25315406918525696 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.43324679136276245, + "learning_rate": 7.507869215064871e-06, + "loss": 0.1806, + "step": 20745, + "teacher_loss": 0.1525239795446396 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.20522859692573547, + "learning_rate": 7.50590165352774e-06, + "loss": 0.2204, + "step": 20746, + "teacher_loss": 0.22207316756248474 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.13888667523860931, + "learning_rate": 7.503934263807813e-06, + "loss": 0.1978, + "step": 20747, + "teacher_loss": 0.20436108112335205 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.25033554434776306, + "learning_rate": 7.5019670459501974e-06, + "loss": 0.1598, + "step": 20748, + "teacher_loss": 0.14969266951084137 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.2894870638847351, + "learning_rate": 7.500000000000004e-06, + "loss": 0.2212, + "step": 20749, + "teacher_loss": 0.2136184275150299 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 1.0707168579101562, + "learning_rate": 7.498033126002317e-06, + "loss": 0.335, + "step": 20750, + "teacher_loss": 0.2532368302345276 + }, + { + "epoch": 3.75, + "eval_exact_match": 80.40681173131505, + "eval_f1": 87.72404554738125, + "step": 20750 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.20284032821655273, + "learning_rate": 7.496066424002239e-06, + "loss": 0.1546, + "step": 20751, + "teacher_loss": 0.1491994708776474 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.26079559326171875, + "learning_rate": 7.4940998940448654e-06, + "loss": 0.1975, + "step": 20752, + "teacher_loss": 0.19049356877803802 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.2134438455104828, + "learning_rate": 7.492133536175272e-06, + "loss": 0.2185, + "step": 20753, + "teacher_loss": 0.2190854847431183 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6453027725219727, + "learning_rate": 7.490167350438553e-06, + "loss": 0.2175, + "step": 20754, + "teacher_loss": 0.16995762288570404 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6194392442703247, + "learning_rate": 7.4882013368797745e-06, + "loss": 0.2662, + "step": 20755, + "teacher_loss": 0.22695858776569366 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.3406556248664856, + "learning_rate": 7.486235495544019e-06, + "loss": 0.2577, + "step": 20756, + "teacher_loss": 0.2484908401966095 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.601486086845398, + "learning_rate": 7.484269826476361e-06, + "loss": 0.2502, + "step": 20757, + "teacher_loss": 0.21122130751609802 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.42010968923568726, + "learning_rate": 7.482304329721857e-06, + "loss": 0.1927, + "step": 20758, + "teacher_loss": 0.167444109916687 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.5542106628417969, + "learning_rate": 7.480339005325576e-06, + "loss": 0.341, + "step": 20759, + "teacher_loss": 0.3173280358314514 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6838760375976562, + "learning_rate": 7.478373853332581e-06, + "loss": 0.3701, + "step": 20760, + "teacher_loss": 0.33525902032852173 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6941088438034058, + "learning_rate": 7.476408873787918e-06, + "loss": 0.2933, + "step": 20761, + "teacher_loss": 0.2487148493528366 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.1887531280517578, + "learning_rate": 7.474444066736647e-06, + "loss": 0.1747, + "step": 20762, + "teacher_loss": 0.17308753728866577 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.392214298248291, + "learning_rate": 7.472479432223806e-06, + "loss": 0.24, + "step": 20763, + "teacher_loss": 0.22308039665222168 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.2423935979604721, + "learning_rate": 7.470514970294443e-06, + "loss": 0.1707, + "step": 20764, + "teacher_loss": 0.1626831591129303 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.10280925035476685, + "learning_rate": 7.468550680993604e-06, + "loss": 0.1331, + "step": 20765, + "teacher_loss": 0.13645391166210175 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.3067972958087921, + "learning_rate": 7.466586564366309e-06, + "loss": 0.1922, + "step": 20766, + "teacher_loss": 0.1795041859149933 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.9471680521965027, + "learning_rate": 7.464622620457605e-06, + "loss": 0.2577, + "step": 20767, + "teacher_loss": 0.1810496747493744 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.15797367691993713, + "learning_rate": 7.462658849312507e-06, + "loss": 0.163, + "step": 20768, + "teacher_loss": 0.16355343163013458 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.5739122629165649, + "learning_rate": 7.460695250976042e-06, + "loss": 0.287, + "step": 20769, + "teacher_loss": 0.25512754917144775 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.9545706510543823, + "learning_rate": 7.458731825493237e-06, + "loss": 0.5573, + "step": 20770, + "teacher_loss": 0.5131410360336304 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.2062109261751175, + "learning_rate": 7.456768572909097e-06, + "loss": 0.2129, + "step": 20771, + "teacher_loss": 0.21362704038619995 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.9647200107574463, + "learning_rate": 7.454805493268635e-06, + "loss": 0.5632, + "step": 20772, + "teacher_loss": 0.5185519456863403 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.268298476934433, + "learning_rate": 7.4528425866168705e-06, + "loss": 0.2095, + "step": 20773, + "teacher_loss": 0.2029745727777481 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.969589114189148, + "learning_rate": 7.450879852998791e-06, + "loss": 0.4408, + "step": 20774, + "teacher_loss": 0.3820270299911499 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.4565858542919159, + "learning_rate": 7.4489172924594014e-06, + "loss": 0.2322, + "step": 20775, + "teacher_loss": 0.20724791288375854 + }, + { + "compression_loss": 0.0, + "epoch": 3.75, + "label_loss": 0.6731314659118652, + "learning_rate": 7.446954905043707e-06, + "loss": 0.2596, + "step": 20776, + "teacher_loss": 0.21364635229110718 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4056048095226288, + "learning_rate": 7.444992690796691e-06, + "loss": 0.2527, + "step": 20777, + "teacher_loss": 0.23567092418670654 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.2888379693031311, + "learning_rate": 7.4430306497633286e-06, + "loss": 0.3097, + "step": 20778, + "teacher_loss": 0.31204405426979065 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5134663581848145, + "learning_rate": 7.441068781988628e-06, + "loss": 0.3031, + "step": 20779, + "teacher_loss": 0.27977266907691956 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.3733491897583008, + "learning_rate": 7.439107087517559e-06, + "loss": 0.2276, + "step": 20780, + "teacher_loss": 0.21140369772911072 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.23875029385089874, + "learning_rate": 7.437145566395088e-06, + "loss": 0.205, + "step": 20781, + "teacher_loss": 0.2012852132320404 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.46868765354156494, + "learning_rate": 7.435184218666195e-06, + "loss": 0.2676, + "step": 20782, + "teacher_loss": 0.24521180987358093 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4392552077770233, + "learning_rate": 7.433223044375854e-06, + "loss": 0.2336, + "step": 20783, + "teacher_loss": 0.21076600253582 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.25900956988334656, + "learning_rate": 7.431262043569016e-06, + "loss": 0.2107, + "step": 20784, + "teacher_loss": 0.205328106880188 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.49829140305519104, + "learning_rate": 7.429301216290648e-06, + "loss": 0.2545, + "step": 20785, + "teacher_loss": 0.2273646593093872 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.9398878216743469, + "learning_rate": 7.42734056258571e-06, + "loss": 0.372, + "step": 20786, + "teacher_loss": 0.3088604211807251 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4491899013519287, + "learning_rate": 7.425380082499144e-06, + "loss": 0.24, + "step": 20787, + "teacher_loss": 0.2167256772518158 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4651971757411957, + "learning_rate": 7.4234197760759015e-06, + "loss": 0.286, + "step": 20788, + "teacher_loss": 0.26613929867744446 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.7286639213562012, + "learning_rate": 7.421459643360934e-06, + "loss": 0.2367, + "step": 20789, + "teacher_loss": 0.18205049633979797 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4562970995903015, + "learning_rate": 7.419499684399175e-06, + "loss": 0.2122, + "step": 20790, + "teacher_loss": 0.18506991863250732 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4934269189834595, + "learning_rate": 7.417539899235549e-06, + "loss": 0.2909, + "step": 20791, + "teacher_loss": 0.26839479804039 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5747343897819519, + "learning_rate": 7.415580287915012e-06, + "loss": 0.3562, + "step": 20792, + "teacher_loss": 0.3319389522075653 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.3341788053512573, + "learning_rate": 7.413620850482479e-06, + "loss": 0.246, + "step": 20793, + "teacher_loss": 0.23622292280197144 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.3715561330318451, + "learning_rate": 7.411661586982871e-06, + "loss": 0.2777, + "step": 20794, + "teacher_loss": 0.2672439217567444 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4468238353729248, + "learning_rate": 7.409702497461109e-06, + "loss": 0.2056, + "step": 20795, + "teacher_loss": 0.1787426769733429 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.2073078453540802, + "learning_rate": 7.407743581962119e-06, + "loss": 0.1677, + "step": 20796, + "teacher_loss": 0.1633288860321045 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.6839919090270996, + "learning_rate": 7.4057848405307995e-06, + "loss": 0.3752, + "step": 20797, + "teacher_loss": 0.3409017324447632 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4029559791088104, + "learning_rate": 7.403826273212066e-06, + "loss": 0.2701, + "step": 20798, + "teacher_loss": 0.2552831172943115 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.2680244445800781, + "learning_rate": 7.401867880050827e-06, + "loss": 0.1918, + "step": 20799, + "teacher_loss": 0.18330876529216766 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4653906226158142, + "learning_rate": 7.39990966109197e-06, + "loss": 0.3439, + "step": 20800, + "teacher_loss": 0.330363392829895 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.451619029045105, + "learning_rate": 7.397951616380401e-06, + "loss": 0.2554, + "step": 20801, + "teacher_loss": 0.23364028334617615 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.28162825107574463, + "learning_rate": 7.395993745961012e-06, + "loss": 0.1898, + "step": 20802, + "teacher_loss": 0.17959347367286682 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.7520164251327515, + "learning_rate": 7.3940360498786904e-06, + "loss": 0.2888, + "step": 20803, + "teacher_loss": 0.23734937608242035 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.2984544634819031, + "learning_rate": 7.392078528178312e-06, + "loss": 0.1921, + "step": 20804, + "teacher_loss": 0.1802406907081604 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.25831353664398193, + "learning_rate": 7.390121180904763e-06, + "loss": 0.2208, + "step": 20805, + "teacher_loss": 0.21664166450500488 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5697411298751831, + "learning_rate": 7.388164008102926e-06, + "loss": 0.4947, + "step": 20806, + "teacher_loss": 0.48634475469589233 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5164893269538879, + "learning_rate": 7.38620700981766e-06, + "loss": 0.2506, + "step": 20807, + "teacher_loss": 0.2210976928472519 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.28908205032348633, + "learning_rate": 7.384250186093841e-06, + "loss": 0.1901, + "step": 20808, + "teacher_loss": 0.17913052439689636 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.6624298095703125, + "learning_rate": 7.3822935369763375e-06, + "loss": 0.2286, + "step": 20809, + "teacher_loss": 0.1804494559764862 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.28073588013648987, + "learning_rate": 7.380337062509998e-06, + "loss": 0.2653, + "step": 20810, + "teacher_loss": 0.26360517740249634 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.349109947681427, + "learning_rate": 7.378380762739685e-06, + "loss": 0.1806, + "step": 20811, + "teacher_loss": 0.1619122326374054 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.2896420955657959, + "learning_rate": 7.376424637710257e-06, + "loss": 0.2882, + "step": 20812, + "teacher_loss": 0.28800883889198303 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5932540893554688, + "learning_rate": 7.374468687466548e-06, + "loss": 0.2707, + "step": 20813, + "teacher_loss": 0.2348783016204834 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.7519567012786865, + "learning_rate": 7.372512912053411e-06, + "loss": 0.2854, + "step": 20814, + "teacher_loss": 0.23355445265769958 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.17227578163146973, + "learning_rate": 7.370557311515689e-06, + "loss": 0.1767, + "step": 20815, + "teacher_loss": 0.17721986770629883 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5862759351730347, + "learning_rate": 7.368601885898208e-06, + "loss": 0.367, + "step": 20816, + "teacher_loss": 0.34264010190963745 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.19032594561576843, + "learning_rate": 7.366646635245812e-06, + "loss": 0.17, + "step": 20817, + "teacher_loss": 0.1677112877368927 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.40794655680656433, + "learning_rate": 7.3646915596033165e-06, + "loss": 0.2471, + "step": 20818, + "teacher_loss": 0.2292812317609787 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.45370203256607056, + "learning_rate": 7.3627366590155515e-06, + "loss": 0.2238, + "step": 20819, + "teacher_loss": 0.19825172424316406 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.6985903978347778, + "learning_rate": 7.360781933527343e-06, + "loss": 0.3495, + "step": 20820, + "teacher_loss": 0.31075701117515564 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.310874879360199, + "learning_rate": 7.358827383183497e-06, + "loss": 0.1894, + "step": 20821, + "teacher_loss": 0.17586715519428253 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.24492311477661133, + "learning_rate": 7.356873008028834e-06, + "loss": 0.2219, + "step": 20822, + "teacher_loss": 0.2193339616060257 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.2357134222984314, + "learning_rate": 7.354918808108152e-06, + "loss": 0.1816, + "step": 20823, + "teacher_loss": 0.17560246586799622 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5981073379516602, + "learning_rate": 7.352964783466261e-06, + "loss": 0.3156, + "step": 20824, + "teacher_loss": 0.2841559946537018 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.23481610417366028, + "learning_rate": 7.351010934147965e-06, + "loss": 0.2163, + "step": 20825, + "teacher_loss": 0.214248925447464 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.4718189835548401, + "learning_rate": 7.34905726019805e-06, + "loss": 0.2146, + "step": 20826, + "teacher_loss": 0.18602976202964783 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.46568188071250916, + "learning_rate": 7.34710376166132e-06, + "loss": 0.2754, + "step": 20827, + "teacher_loss": 0.25426435470581055 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.5828574299812317, + "learning_rate": 7.34515043858255e-06, + "loss": 0.2448, + "step": 20828, + "teacher_loss": 0.20720547437667847 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.41344600915908813, + "learning_rate": 7.343197291006531e-06, + "loss": 0.2334, + "step": 20829, + "teacher_loss": 0.21338143944740295 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.43301451206207275, + "learning_rate": 7.341244318978046e-06, + "loss": 0.2463, + "step": 20830, + "teacher_loss": 0.2255607545375824 + }, + { + "compression_loss": 0.0, + "epoch": 3.76, + "label_loss": 0.36874908208847046, + "learning_rate": 7.339291522541861e-06, + "loss": 0.2803, + "step": 20831, + "teacher_loss": 0.2705221176147461 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.22135399281978607, + "learning_rate": 7.337338901742754e-06, + "loss": 0.1627, + "step": 20832, + "teacher_loss": 0.15615665912628174 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.6512225866317749, + "learning_rate": 7.3353864566254975e-06, + "loss": 0.2871, + "step": 20833, + "teacher_loss": 0.2466907501220703 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.24554145336151123, + "learning_rate": 7.3334341872348445e-06, + "loss": 0.2228, + "step": 20834, + "teacher_loss": 0.22029517590999603 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.4795461595058441, + "learning_rate": 7.33148209361556e-06, + "loss": 0.4373, + "step": 20835, + "teacher_loss": 0.4326130151748657 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 1.1043647527694702, + "learning_rate": 7.329530175812406e-06, + "loss": 0.3464, + "step": 20836, + "teacher_loss": 0.2621748447418213 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.8489279747009277, + "learning_rate": 7.327578433870121e-06, + "loss": 0.2893, + "step": 20837, + "teacher_loss": 0.22709746658802032 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.48972082138061523, + "learning_rate": 7.32562686783346e-06, + "loss": 0.3263, + "step": 20838, + "teacher_loss": 0.30818068981170654 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.37717700004577637, + "learning_rate": 7.323675477747171e-06, + "loss": 0.2243, + "step": 20839, + "teacher_loss": 0.20733988285064697 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.3475998044013977, + "learning_rate": 7.321724263655989e-06, + "loss": 0.2944, + "step": 20840, + "teacher_loss": 0.2885274887084961 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.09062592685222626, + "learning_rate": 7.319773225604638e-06, + "loss": 0.1162, + "step": 20841, + "teacher_loss": 0.11907285451889038 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.7083640098571777, + "learning_rate": 7.317822363637872e-06, + "loss": 0.2279, + "step": 20842, + "teacher_loss": 0.17451521754264832 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.29006364941596985, + "learning_rate": 7.315871677800406e-06, + "loss": 0.2013, + "step": 20843, + "teacher_loss": 0.19147752225399017 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.34421366453170776, + "learning_rate": 7.3139211681369586e-06, + "loss": 0.2004, + "step": 20844, + "teacher_loss": 0.18447571992874146 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.600397527217865, + "learning_rate": 7.311970834692255e-06, + "loss": 0.2618, + "step": 20845, + "teacher_loss": 0.22417312860488892 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.6416093111038208, + "learning_rate": 7.3100206775110165e-06, + "loss": 0.3441, + "step": 20846, + "teacher_loss": 0.310992956161499 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.13265115022659302, + "learning_rate": 7.30807069663794e-06, + "loss": 0.2227, + "step": 20847, + "teacher_loss": 0.23272722959518433 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.37863844633102417, + "learning_rate": 7.306120892117743e-06, + "loss": 0.3172, + "step": 20848, + "teacher_loss": 0.3104074001312256 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.2519739866256714, + "learning_rate": 7.304171263995132e-06, + "loss": 0.2236, + "step": 20849, + "teacher_loss": 0.22041761875152588 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.3375350832939148, + "learning_rate": 7.302221812314801e-06, + "loss": 0.2161, + "step": 20850, + "teacher_loss": 0.20255626738071442 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.2439296990633011, + "learning_rate": 7.300272537121433e-06, + "loss": 0.2147, + "step": 20851, + "teacher_loss": 0.21144267916679382 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.7562127113342285, + "learning_rate": 7.2983234384597404e-06, + "loss": 0.2484, + "step": 20852, + "teacher_loss": 0.19194942712783813 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.4540614187717438, + "learning_rate": 7.2963745163744026e-06, + "loss": 0.2384, + "step": 20853, + "teacher_loss": 0.2144690752029419 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.6738770604133606, + "learning_rate": 7.294425770910088e-06, + "loss": 0.2537, + "step": 20854, + "teacher_loss": 0.20696386694908142 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.6841431856155396, + "learning_rate": 7.292477202111501e-06, + "loss": 0.3347, + "step": 20855, + "teacher_loss": 0.29586371779441833 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.4516450762748718, + "learning_rate": 7.290528810023302e-06, + "loss": 0.2078, + "step": 20856, + "teacher_loss": 0.1807451844215393 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.23400571942329407, + "learning_rate": 7.288580594690157e-06, + "loss": 0.2043, + "step": 20857, + "teacher_loss": 0.20101910829544067 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.12988528609275818, + "learning_rate": 7.28663255615674e-06, + "loss": 0.1384, + "step": 20858, + "teacher_loss": 0.13936671614646912 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.49110034108161926, + "learning_rate": 7.284684694467717e-06, + "loss": 0.222, + "step": 20859, + "teacher_loss": 0.19211995601654053 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.24366320669651031, + "learning_rate": 7.282737009667738e-06, + "loss": 0.3268, + "step": 20860, + "teacher_loss": 0.3360125422477722 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.17032060027122498, + "learning_rate": 7.280789501801461e-06, + "loss": 0.1447, + "step": 20861, + "teacher_loss": 0.14180141687393188 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.2818715572357178, + "learning_rate": 7.2788421709135445e-06, + "loss": 0.1726, + "step": 20862, + "teacher_loss": 0.16046112775802612 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.38449782133102417, + "learning_rate": 7.276895017048621e-06, + "loss": 0.255, + "step": 20863, + "teacher_loss": 0.24063386023044586 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.6231509447097778, + "learning_rate": 7.2749480402513394e-06, + "loss": 0.3675, + "step": 20864, + "teacher_loss": 0.33914023637771606 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.6110333204269409, + "learning_rate": 7.273001240566343e-06, + "loss": 0.2753, + "step": 20865, + "teacher_loss": 0.23794592916965485 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.45223790407180786, + "learning_rate": 7.271054618038264e-06, + "loss": 0.2339, + "step": 20866, + "teacher_loss": 0.20965829491615295 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.4540024995803833, + "learning_rate": 7.269108172711722e-06, + "loss": 0.2203, + "step": 20867, + "teacher_loss": 0.19429796934127808 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.40884512662887573, + "learning_rate": 7.2671619046313525e-06, + "loss": 0.2367, + "step": 20868, + "teacher_loss": 0.21762433648109436 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.2868151366710663, + "learning_rate": 7.265215813841782e-06, + "loss": 0.2914, + "step": 20869, + "teacher_loss": 0.2919510006904602 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.1320994794368744, + "learning_rate": 7.263269900387618e-06, + "loss": 0.1359, + "step": 20870, + "teacher_loss": 0.13634318113327026 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.5066434741020203, + "learning_rate": 7.261324164313477e-06, + "loss": 0.2777, + "step": 20871, + "teacher_loss": 0.252309650182724 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.2875855267047882, + "learning_rate": 7.259378605663979e-06, + "loss": 0.1866, + "step": 20872, + "teacher_loss": 0.17539288103580475 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.26620596647262573, + "learning_rate": 7.257433224483714e-06, + "loss": 0.1747, + "step": 20873, + "teacher_loss": 0.16452372074127197 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.16928622126579285, + "learning_rate": 7.255488020817293e-06, + "loss": 0.1459, + "step": 20874, + "teacher_loss": 0.14334167540073395 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 1.0552732944488525, + "learning_rate": 7.253542994709316e-06, + "loss": 0.3185, + "step": 20875, + "teacher_loss": 0.2366788387298584 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.30774903297424316, + "learning_rate": 7.251598146204371e-06, + "loss": 0.2427, + "step": 20876, + "teacher_loss": 0.2354506105184555 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.2725062072277069, + "learning_rate": 7.249653475347054e-06, + "loss": 0.2206, + "step": 20877, + "teacher_loss": 0.21486398577690125 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.4646933078765869, + "learning_rate": 7.247708982181939e-06, + "loss": 0.2537, + "step": 20878, + "teacher_loss": 0.2302594929933548 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.3467743396759033, + "learning_rate": 7.245764666753617e-06, + "loss": 0.2544, + "step": 20879, + "teacher_loss": 0.24415363371372223 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.5541468262672424, + "learning_rate": 7.243820529106667e-06, + "loss": 0.1854, + "step": 20880, + "teacher_loss": 0.14439867436885834 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.5157880187034607, + "learning_rate": 7.241876569285651e-06, + "loss": 0.2461, + "step": 20881, + "teacher_loss": 0.2161901891231537 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.3942872881889343, + "learning_rate": 7.239932787335147e-06, + "loss": 0.2899, + "step": 20882, + "teacher_loss": 0.278264582157135 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.37743228673934937, + "learning_rate": 7.237989183299724e-06, + "loss": 0.1953, + "step": 20883, + "teacher_loss": 0.1750173419713974 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.765160322189331, + "learning_rate": 7.236045757223931e-06, + "loss": 0.4015, + "step": 20884, + "teacher_loss": 0.36105063557624817 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.4167137145996094, + "learning_rate": 7.234102509152336e-06, + "loss": 0.2671, + "step": 20885, + "teacher_loss": 0.25046032667160034 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.3473365306854248, + "learning_rate": 7.232159439129484e-06, + "loss": 0.271, + "step": 20886, + "teacher_loss": 0.26248565316200256 + }, + { + "compression_loss": 0.0, + "epoch": 3.77, + "label_loss": 0.3602268397808075, + "learning_rate": 7.230216547199925e-06, + "loss": 0.19, + "step": 20887, + "teacher_loss": 0.17105732858181 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5267833471298218, + "learning_rate": 7.228273833408211e-06, + "loss": 0.2156, + "step": 20888, + "teacher_loss": 0.18102025985717773 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6122809052467346, + "learning_rate": 7.226331297798872e-06, + "loss": 0.2963, + "step": 20889, + "teacher_loss": 0.261149525642395 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.51596599817276, + "learning_rate": 7.2243889404164555e-06, + "loss": 0.1937, + "step": 20890, + "teacher_loss": 0.15793634951114655 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.38295289874076843, + "learning_rate": 7.222446761305483e-06, + "loss": 0.2461, + "step": 20891, + "teacher_loss": 0.2308589220046997 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.2524401843547821, + "learning_rate": 7.2205047605104865e-06, + "loss": 0.1371, + "step": 20892, + "teacher_loss": 0.12423904240131378 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.4336429238319397, + "learning_rate": 7.218562938075999e-06, + "loss": 0.2132, + "step": 20893, + "teacher_loss": 0.18868887424468994 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.3345625400543213, + "learning_rate": 7.216621294046527e-06, + "loss": 0.2492, + "step": 20894, + "teacher_loss": 0.23971731960773468 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.742740273475647, + "learning_rate": 7.214679828466593e-06, + "loss": 0.3532, + "step": 20895, + "teacher_loss": 0.3098759055137634 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.2610739469528198, + "learning_rate": 7.212738541380714e-06, + "loss": 0.2035, + "step": 20896, + "teacher_loss": 0.19704784452915192 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6191931962966919, + "learning_rate": 7.210797432833388e-06, + "loss": 0.2289, + "step": 20897, + "teacher_loss": 0.18557915091514587 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.9858016967773438, + "learning_rate": 7.208856502869122e-06, + "loss": 0.3076, + "step": 20898, + "teacher_loss": 0.23226192593574524 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6767506003379822, + "learning_rate": 7.206915751532425e-06, + "loss": 0.5641, + "step": 20899, + "teacher_loss": 0.551571786403656 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.24747434258460999, + "learning_rate": 7.204975178867783e-06, + "loss": 0.1922, + "step": 20900, + "teacher_loss": 0.18601638078689575 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.49998316168785095, + "learning_rate": 7.203034784919681e-06, + "loss": 0.2239, + "step": 20901, + "teacher_loss": 0.1931706666946411 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6947510242462158, + "learning_rate": 7.201094569732623e-06, + "loss": 0.2851, + "step": 20902, + "teacher_loss": 0.23953962326049805 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.33247101306915283, + "learning_rate": 7.199154533351086e-06, + "loss": 0.2172, + "step": 20903, + "teacher_loss": 0.20434662699699402 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.3905603587627411, + "learning_rate": 7.197214675819536e-06, + "loss": 0.2172, + "step": 20904, + "teacher_loss": 0.19794002175331116 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.601241409778595, + "learning_rate": 7.1952749971824714e-06, + "loss": 0.2237, + "step": 20905, + "teacher_loss": 0.1817316710948944 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.7119176983833313, + "learning_rate": 7.193335497484349e-06, + "loss": 0.2628, + "step": 20906, + "teacher_loss": 0.21287992596626282 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6558371782302856, + "learning_rate": 7.1913961767696344e-06, + "loss": 0.2668, + "step": 20907, + "teacher_loss": 0.22353026270866394 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5078572034835815, + "learning_rate": 7.189457035082794e-06, + "loss": 0.2156, + "step": 20908, + "teacher_loss": 0.18311774730682373 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.8642302751541138, + "learning_rate": 7.187518072468291e-06, + "loss": 0.2797, + "step": 20909, + "teacher_loss": 0.2147475779056549 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.4342668056488037, + "learning_rate": 7.185579288970571e-06, + "loss": 0.176, + "step": 20910, + "teacher_loss": 0.1472967565059662 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.4895259737968445, + "learning_rate": 7.183640684634089e-06, + "loss": 0.2315, + "step": 20911, + "teacher_loss": 0.20279614627361298 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.49807092547416687, + "learning_rate": 7.181702259503296e-06, + "loss": 0.2732, + "step": 20912, + "teacher_loss": 0.2482655942440033 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 1.2886818647384644, + "learning_rate": 7.179764013622631e-06, + "loss": 0.3024, + "step": 20913, + "teacher_loss": 0.19279402494430542 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.711980402469635, + "learning_rate": 7.1778259470365185e-06, + "loss": 0.2405, + "step": 20914, + "teacher_loss": 0.18815943598747253 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.15923148393630981, + "learning_rate": 7.175888059789418e-06, + "loss": 0.1291, + "step": 20915, + "teacher_loss": 0.12570597231388092 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5589304566383362, + "learning_rate": 7.173950351925746e-06, + "loss": 0.3095, + "step": 20916, + "teacher_loss": 0.28174859285354614 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6440385580062866, + "learning_rate": 7.172012823489921e-06, + "loss": 0.2838, + "step": 20917, + "teacher_loss": 0.24373877048492432 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6720754504203796, + "learning_rate": 7.170075474526373e-06, + "loss": 0.2991, + "step": 20918, + "teacher_loss": 0.2576434016227722 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6613928079605103, + "learning_rate": 7.1681383050795275e-06, + "loss": 0.2648, + "step": 20919, + "teacher_loss": 0.22068515419960022 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5318841338157654, + "learning_rate": 7.166201315193781e-06, + "loss": 0.2313, + "step": 20920, + "teacher_loss": 0.1978864073753357 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6168327927589417, + "learning_rate": 7.164264504913552e-06, + "loss": 0.2754, + "step": 20921, + "teacher_loss": 0.23745819926261902 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.32774221897125244, + "learning_rate": 7.162327874283251e-06, + "loss": 0.1931, + "step": 20922, + "teacher_loss": 0.17813856899738312 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.22952650487422943, + "learning_rate": 7.160391423347267e-06, + "loss": 0.1566, + "step": 20923, + "teacher_loss": 0.14851021766662598 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.2600582242012024, + "learning_rate": 7.158455152150004e-06, + "loss": 0.1664, + "step": 20924, + "teacher_loss": 0.15599417686462402 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.3207114040851593, + "learning_rate": 7.156519060735861e-06, + "loss": 0.2424, + "step": 20925, + "teacher_loss": 0.23366346955299377 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.20522890985012054, + "learning_rate": 7.154583149149212e-06, + "loss": 0.3245, + "step": 20926, + "teacher_loss": 0.3377407193183899 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5513725280761719, + "learning_rate": 7.152647417434456e-06, + "loss": 0.3456, + "step": 20927, + "teacher_loss": 0.3227009177207947 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.2717287838459015, + "learning_rate": 7.150711865635962e-06, + "loss": 0.2548, + "step": 20928, + "teacher_loss": 0.25286778807640076 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.8975619077682495, + "learning_rate": 7.148776493798118e-06, + "loss": 0.2593, + "step": 20929, + "teacher_loss": 0.188373863697052 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.29291635751724243, + "learning_rate": 7.146841301965284e-06, + "loss": 0.185, + "step": 20930, + "teacher_loss": 0.17304736375808716 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.44363588094711304, + "learning_rate": 7.144906290181832e-06, + "loss": 0.201, + "step": 20931, + "teacher_loss": 0.174034982919693 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5962947010993958, + "learning_rate": 7.142971458492136e-06, + "loss": 0.1849, + "step": 20932, + "teacher_loss": 0.13924050331115723 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5456302165985107, + "learning_rate": 7.14103680694054e-06, + "loss": 0.3215, + "step": 20933, + "teacher_loss": 0.29663681983947754 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.1243998259305954, + "learning_rate": 7.139102335571409e-06, + "loss": 0.1244, + "step": 20934, + "teacher_loss": 0.1244472861289978 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.2211330085992813, + "learning_rate": 7.137168044429099e-06, + "loss": 0.1481, + "step": 20935, + "teacher_loss": 0.13996800780296326 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6663065552711487, + "learning_rate": 7.135233933557945e-06, + "loss": 0.3028, + "step": 20936, + "teacher_loss": 0.2623865008354187 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.5568263530731201, + "learning_rate": 7.133300003002298e-06, + "loss": 0.2409, + "step": 20937, + "teacher_loss": 0.20585143566131592 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.6896703839302063, + "learning_rate": 7.131366252806501e-06, + "loss": 0.2174, + "step": 20938, + "teacher_loss": 0.1649707555770874 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.4019356369972229, + "learning_rate": 7.12943268301488e-06, + "loss": 0.2026, + "step": 20939, + "teacher_loss": 0.180498868227005 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.4665055572986603, + "learning_rate": 7.127499293671776e-06, + "loss": 0.2278, + "step": 20940, + "teacher_loss": 0.20129835605621338 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.7757083773612976, + "learning_rate": 7.125566084821504e-06, + "loss": 0.2774, + "step": 20941, + "teacher_loss": 0.2219896763563156 + }, + { + "compression_loss": 0.0, + "epoch": 3.78, + "label_loss": 0.9921225309371948, + "learning_rate": 7.123633056508393e-06, + "loss": 0.3373, + "step": 20942, + "teacher_loss": 0.2645341753959656 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.2844240665435791, + "learning_rate": 7.1217002087767655e-06, + "loss": 0.2403, + "step": 20943, + "teacher_loss": 0.23539794981479645 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.44497138261795044, + "learning_rate": 7.119767541670928e-06, + "loss": 0.1854, + "step": 20944, + "teacher_loss": 0.15654712915420532 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 1.0019617080688477, + "learning_rate": 7.117835055235195e-06, + "loss": 0.3468, + "step": 20945, + "teacher_loss": 0.2740297019481659 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.6010870933532715, + "learning_rate": 7.115902749513877e-06, + "loss": 0.2205, + "step": 20946, + "teacher_loss": 0.17817196249961853 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.6027884483337402, + "learning_rate": 7.113970624551266e-06, + "loss": 0.33, + "step": 20947, + "teacher_loss": 0.29969367384910583 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.33525532484054565, + "learning_rate": 7.112038680391671e-06, + "loss": 0.2524, + "step": 20948, + "teacher_loss": 0.24323342740535736 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.4300152063369751, + "learning_rate": 7.110106917079373e-06, + "loss": 0.3034, + "step": 20949, + "teacher_loss": 0.28936296701431274 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.813190758228302, + "learning_rate": 7.108175334658675e-06, + "loss": 0.3276, + "step": 20950, + "teacher_loss": 0.2736360430717468 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.27036961913108826, + "learning_rate": 7.106243933173849e-06, + "loss": 0.1644, + "step": 20951, + "teacher_loss": 0.1526794284582138 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.4308510422706604, + "learning_rate": 7.104312712669183e-06, + "loss": 0.2561, + "step": 20952, + "teacher_loss": 0.23672431707382202 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.33989912271499634, + "learning_rate": 7.102381673188961e-06, + "loss": 0.2492, + "step": 20953, + "teacher_loss": 0.23914164304733276 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5601613521575928, + "learning_rate": 7.100450814777443e-06, + "loss": 0.2172, + "step": 20954, + "teacher_loss": 0.1790561079978943 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.22413137555122375, + "learning_rate": 7.098520137478902e-06, + "loss": 0.1791, + "step": 20955, + "teacher_loss": 0.17414340376853943 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5522635579109192, + "learning_rate": 7.096589641337611e-06, + "loss": 0.3217, + "step": 20956, + "teacher_loss": 0.2960282862186432 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.29903745651245117, + "learning_rate": 7.094659326397818e-06, + "loss": 0.2091, + "step": 20957, + "teacher_loss": 0.1991077959537506 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.3318334221839905, + "learning_rate": 7.092729192703786e-06, + "loss": 0.1981, + "step": 20958, + "teacher_loss": 0.1832163780927658 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.14552034437656403, + "learning_rate": 7.0907992402997715e-06, + "loss": 0.1534, + "step": 20959, + "teacher_loss": 0.1542649269104004 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.23334616422653198, + "learning_rate": 7.088869469230013e-06, + "loss": 0.1518, + "step": 20960, + "teacher_loss": 0.1427372395992279 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.9505484104156494, + "learning_rate": 7.086939879538756e-06, + "loss": 0.3765, + "step": 20961, + "teacher_loss": 0.3126862943172455 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.28648293018341064, + "learning_rate": 7.085010471270252e-06, + "loss": 0.1921, + "step": 20962, + "teacher_loss": 0.1816261112689972 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.4025924801826477, + "learning_rate": 7.083081244468725e-06, + "loss": 0.2165, + "step": 20963, + "teacher_loss": 0.1958579421043396 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.4363923668861389, + "learning_rate": 7.081152199178399e-06, + "loss": 0.2188, + "step": 20964, + "teacher_loss": 0.19462257623672485 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.39485037326812744, + "learning_rate": 7.07922333544352e-06, + "loss": 0.222, + "step": 20965, + "teacher_loss": 0.2027878761291504 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.376198947429657, + "learning_rate": 7.077294653308305e-06, + "loss": 0.249, + "step": 20966, + "teacher_loss": 0.23481214046478271 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.34262073040008545, + "learning_rate": 7.075366152816963e-06, + "loss": 0.2261, + "step": 20967, + "teacher_loss": 0.21318262815475464 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.6181092262268066, + "learning_rate": 7.0734378340137166e-06, + "loss": 0.259, + "step": 20968, + "teacher_loss": 0.2190513014793396 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.41529470682144165, + "learning_rate": 7.071509696942781e-06, + "loss": 0.235, + "step": 20969, + "teacher_loss": 0.2150205671787262 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5941544771194458, + "learning_rate": 7.069581741648351e-06, + "loss": 0.2399, + "step": 20970, + "teacher_loss": 0.2004947066307068 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.397749662399292, + "learning_rate": 7.067653968174636e-06, + "loss": 0.3289, + "step": 20971, + "teacher_loss": 0.3212510347366333 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5265370011329651, + "learning_rate": 7.065726376565839e-06, + "loss": 0.2372, + "step": 20972, + "teacher_loss": 0.20500922203063965 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.22794246673583984, + "learning_rate": 7.063798966866147e-06, + "loss": 0.1873, + "step": 20973, + "teacher_loss": 0.1827775537967682 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.41547369956970215, + "learning_rate": 7.06187173911974e-06, + "loss": 0.2052, + "step": 20974, + "teacher_loss": 0.18178215622901917 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.48197731375694275, + "learning_rate": 7.059944693370825e-06, + "loss": 0.2123, + "step": 20975, + "teacher_loss": 0.18234783411026 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.15815985202789307, + "learning_rate": 7.058017829663573e-06, + "loss": 0.1688, + "step": 20976, + "teacher_loss": 0.17002446949481964 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.270396888256073, + "learning_rate": 7.05609114804215e-06, + "loss": 0.2024, + "step": 20977, + "teacher_loss": 0.19486625492572784 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.705931544303894, + "learning_rate": 7.054164648550749e-06, + "loss": 0.276, + "step": 20978, + "teacher_loss": 0.22827589511871338 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.2824687659740448, + "learning_rate": 7.052238331233533e-06, + "loss": 0.2089, + "step": 20979, + "teacher_loss": 0.20077916979789734 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.9517744183540344, + "learning_rate": 7.050312196134655e-06, + "loss": 0.3425, + "step": 20980, + "teacher_loss": 0.2747650742530823 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.36999624967575073, + "learning_rate": 7.048386243298286e-06, + "loss": 0.1804, + "step": 20981, + "teacher_loss": 0.15937253832817078 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.44541680812835693, + "learning_rate": 7.046460472768586e-06, + "loss": 0.1996, + "step": 20982, + "teacher_loss": 0.1723167598247528 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.6684302091598511, + "learning_rate": 7.044534884589694e-06, + "loss": 0.294, + "step": 20983, + "teacher_loss": 0.2524460554122925 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.4397996664047241, + "learning_rate": 7.042609478805766e-06, + "loss": 0.1994, + "step": 20984, + "teacher_loss": 0.1727360188961029 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5474628806114197, + "learning_rate": 7.0406842554609515e-06, + "loss": 0.3218, + "step": 20985, + "teacher_loss": 0.29667186737060547 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5723136067390442, + "learning_rate": 7.038759214599378e-06, + "loss": 0.2689, + "step": 20986, + "teacher_loss": 0.23519571125507355 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.24437585473060608, + "learning_rate": 7.036834356265186e-06, + "loss": 0.2143, + "step": 20987, + "teacher_loss": 0.21095268428325653 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.38687533140182495, + "learning_rate": 7.034909680502514e-06, + "loss": 0.1942, + "step": 20988, + "teacher_loss": 0.17277246713638306 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.27932319045066833, + "learning_rate": 7.032985187355481e-06, + "loss": 0.193, + "step": 20989, + "teacher_loss": 0.18339186906814575 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.4516412913799286, + "learning_rate": 7.0310608768682065e-06, + "loss": 0.2218, + "step": 20990, + "teacher_loss": 0.19622871279716492 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.32434624433517456, + "learning_rate": 7.029136749084815e-06, + "loss": 0.1787, + "step": 20991, + "teacher_loss": 0.16256055235862732 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.517440676689148, + "learning_rate": 7.027212804049424e-06, + "loss": 0.253, + "step": 20992, + "teacher_loss": 0.223586767911911 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5337303876876831, + "learning_rate": 7.025289041806134e-06, + "loss": 0.3084, + "step": 20993, + "teacher_loss": 0.2833779454231262 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.5473419427871704, + "learning_rate": 7.023365462399058e-06, + "loss": 0.2054, + "step": 20994, + "teacher_loss": 0.1674402356147766 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.27413833141326904, + "learning_rate": 7.021442065872303e-06, + "loss": 0.2013, + "step": 20995, + "teacher_loss": 0.19316229224205017 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.2361699640750885, + "learning_rate": 7.019518852269953e-06, + "loss": 0.1616, + "step": 20996, + "teacher_loss": 0.15330219268798828 + }, + { + "compression_loss": 0.0, + "epoch": 3.79, + "label_loss": 0.8159303069114685, + "learning_rate": 7.017595821636109e-06, + "loss": 0.3536, + "step": 20997, + "teacher_loss": 0.30223768949508667 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5530143976211548, + "learning_rate": 7.015672974014867e-06, + "loss": 0.1947, + "step": 20998, + "teacher_loss": 0.15485063195228577 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3127064108848572, + "learning_rate": 7.013750309450299e-06, + "loss": 0.1861, + "step": 20999, + "teacher_loss": 0.17207646369934082 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.17733411490917206, + "learning_rate": 7.011827827986499e-06, + "loss": 0.1373, + "step": 21000, + "teacher_loss": 0.13282468914985657 + }, + { + "epoch": 3.8, + "eval_exact_match": 80.09460737937559, + "eval_f1": 87.52990201813554, + "step": 21000 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.6839249134063721, + "learning_rate": 7.009905529667531e-06, + "loss": 0.2579, + "step": 21001, + "teacher_loss": 0.21056047081947327 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5756430625915527, + "learning_rate": 7.0079834145374744e-06, + "loss": 0.2302, + "step": 21002, + "teacher_loss": 0.1918008029460907 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.7244046926498413, + "learning_rate": 7.006061482640402e-06, + "loss": 0.3214, + "step": 21003, + "teacher_loss": 0.2765883207321167 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.2491282969713211, + "learning_rate": 7.004139734020366e-06, + "loss": 0.2171, + "step": 21004, + "teacher_loss": 0.21359427273273468 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.22341501712799072, + "learning_rate": 7.0022181687214335e-06, + "loss": 0.1711, + "step": 21005, + "teacher_loss": 0.16524332761764526 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.408224493265152, + "learning_rate": 7.000296786787666e-06, + "loss": 0.2195, + "step": 21006, + "teacher_loss": 0.19849984347820282 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4759189188480377, + "learning_rate": 6.9983755882631026e-06, + "loss": 0.2023, + "step": 21007, + "teacher_loss": 0.17186099290847778 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5682344436645508, + "learning_rate": 6.996454573191799e-06, + "loss": 0.2976, + "step": 21008, + "teacher_loss": 0.2675284743309021 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.37052714824676514, + "learning_rate": 6.994533741617799e-06, + "loss": 0.2041, + "step": 21009, + "teacher_loss": 0.18565037846565247 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.23669889569282532, + "learning_rate": 6.992613093585134e-06, + "loss": 0.2232, + "step": 21010, + "teacher_loss": 0.221745103597641 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3380270302295685, + "learning_rate": 6.990692629137849e-06, + "loss": 0.1864, + "step": 21011, + "teacher_loss": 0.169529989361763 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3921695351600647, + "learning_rate": 6.988772348319964e-06, + "loss": 0.2546, + "step": 21012, + "teacher_loss": 0.23934721946716309 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.13165238499641418, + "learning_rate": 6.986852251175515e-06, + "loss": 0.1394, + "step": 21013, + "teacher_loss": 0.1402832269668579 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.47960513830184937, + "learning_rate": 6.984932337748513e-06, + "loss": 0.3595, + "step": 21014, + "teacher_loss": 0.3461185097694397 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.6147972345352173, + "learning_rate": 6.983012608082983e-06, + "loss": 0.2809, + "step": 21015, + "teacher_loss": 0.24375228583812714 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.6853942275047302, + "learning_rate": 6.981093062222944e-06, + "loss": 0.2826, + "step": 21016, + "teacher_loss": 0.23787739872932434 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.14575828611850739, + "learning_rate": 6.979173700212393e-06, + "loss": 0.1684, + "step": 21017, + "teacher_loss": 0.170942023396492 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.23127904534339905, + "learning_rate": 6.977254522095341e-06, + "loss": 0.2312, + "step": 21018, + "teacher_loss": 0.23120540380477905 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5682312846183777, + "learning_rate": 6.975335527915794e-06, + "loss": 0.2887, + "step": 21019, + "teacher_loss": 0.2575920820236206 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4151148498058319, + "learning_rate": 6.97341671771774e-06, + "loss": 0.2517, + "step": 21020, + "teacher_loss": 0.23349085450172424 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3168506622314453, + "learning_rate": 6.971498091545174e-06, + "loss": 0.2338, + "step": 21021, + "teacher_loss": 0.22458340227603912 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.6118311285972595, + "learning_rate": 6.969579649442094e-06, + "loss": 0.297, + "step": 21022, + "teacher_loss": 0.2620731592178345 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.7122552394866943, + "learning_rate": 6.9676613914524744e-06, + "loss": 0.2682, + "step": 21023, + "teacher_loss": 0.21887817978858948 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3397955298423767, + "learning_rate": 6.965743317620288e-06, + "loss": 0.2172, + "step": 21024, + "teacher_loss": 0.20352382957935333 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4656582474708557, + "learning_rate": 6.963825427989529e-06, + "loss": 0.2071, + "step": 21025, + "teacher_loss": 0.17833131551742554 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4977899193763733, + "learning_rate": 6.961907722604158e-06, + "loss": 0.2538, + "step": 21026, + "teacher_loss": 0.22672194242477417 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.623198926448822, + "learning_rate": 6.959990201508133e-06, + "loss": 0.2364, + "step": 21027, + "teacher_loss": 0.19347065687179565 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.15967178344726562, + "learning_rate": 6.958072864745441e-06, + "loss": 0.1182, + "step": 21028, + "teacher_loss": 0.11361486464738846 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.26478299498558044, + "learning_rate": 6.9561557123600244e-06, + "loss": 0.1845, + "step": 21029, + "teacher_loss": 0.17555615305900574 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.6103314161300659, + "learning_rate": 6.9542387443958365e-06, + "loss": 0.2461, + "step": 21030, + "teacher_loss": 0.20566898584365845 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.31152522563934326, + "learning_rate": 6.95232196089683e-06, + "loss": 0.2233, + "step": 21031, + "teacher_loss": 0.21348586678504944 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5567577481269836, + "learning_rate": 6.95040536190696e-06, + "loss": 0.2002, + "step": 21032, + "teacher_loss": 0.16059619188308716 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.49905839562416077, + "learning_rate": 6.948488947470156e-06, + "loss": 0.243, + "step": 21033, + "teacher_loss": 0.2146032154560089 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5236203670501709, + "learning_rate": 6.94657271763036e-06, + "loss": 0.3276, + "step": 21034, + "teacher_loss": 0.3058511018753052 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5662654042243958, + "learning_rate": 6.944656672431512e-06, + "loss": 0.2389, + "step": 21035, + "teacher_loss": 0.20250052213668823 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.29981210827827454, + "learning_rate": 6.942740811917535e-06, + "loss": 0.2146, + "step": 21036, + "teacher_loss": 0.20509478449821472 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.19058065116405487, + "learning_rate": 6.940825136132342e-06, + "loss": 0.2701, + "step": 21037, + "teacher_loss": 0.27888914942741394 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.443927526473999, + "learning_rate": 6.938909645119879e-06, + "loss": 0.2009, + "step": 21038, + "teacher_loss": 0.17391954362392426 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.49460023641586304, + "learning_rate": 6.936994338924049e-06, + "loss": 0.234, + "step": 21039, + "teacher_loss": 0.20503324270248413 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.19955509901046753, + "learning_rate": 6.9350792175887576e-06, + "loss": 0.1589, + "step": 21040, + "teacher_loss": 0.1543952226638794 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.6149266362190247, + "learning_rate": 6.933164281157922e-06, + "loss": 0.2972, + "step": 21041, + "teacher_loss": 0.2619290351867676 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.5166228413581848, + "learning_rate": 6.931249529675449e-06, + "loss": 0.2044, + "step": 21042, + "teacher_loss": 0.16966111958026886 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.739244282245636, + "learning_rate": 6.9293349631852255e-06, + "loss": 0.2731, + "step": 21043, + "teacher_loss": 0.22129297256469727 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.26722463965415955, + "learning_rate": 6.927420581731155e-06, + "loss": 0.2073, + "step": 21044, + "teacher_loss": 0.2006259560585022 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3457545042037964, + "learning_rate": 6.925506385357135e-06, + "loss": 0.2181, + "step": 21045, + "teacher_loss": 0.20396915078163147 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.3232949674129486, + "learning_rate": 6.923592374107038e-06, + "loss": 0.2023, + "step": 21046, + "teacher_loss": 0.1888878047466278 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.12580648064613342, + "learning_rate": 6.921678548024754e-06, + "loss": 0.1572, + "step": 21047, + "teacher_loss": 0.1607227921485901 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4828297793865204, + "learning_rate": 6.919764907154165e-06, + "loss": 0.2346, + "step": 21048, + "teacher_loss": 0.20697534084320068 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4111780524253845, + "learning_rate": 6.917851451539137e-06, + "loss": 0.1784, + "step": 21049, + "teacher_loss": 0.15252187848091125 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.4990905523300171, + "learning_rate": 6.9159381812235486e-06, + "loss": 0.2628, + "step": 21050, + "teacher_loss": 0.23658891022205353 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.2217494249343872, + "learning_rate": 6.914025096251255e-06, + "loss": 0.1448, + "step": 21051, + "teacher_loss": 0.13627220690250397 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.7158692479133606, + "learning_rate": 6.912112196666128e-06, + "loss": 0.222, + "step": 21052, + "teacher_loss": 0.16711562871932983 + }, + { + "compression_loss": 0.0, + "epoch": 3.8, + "label_loss": 0.1085197851061821, + "learning_rate": 6.9101994825120144e-06, + "loss": 0.23, + "step": 21053, + "teacher_loss": 0.24353109300136566 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3970978856086731, + "learning_rate": 6.908286953832772e-06, + "loss": 0.2552, + "step": 21054, + "teacher_loss": 0.23943139612674713 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3145167827606201, + "learning_rate": 6.906374610672257e-06, + "loss": 0.1878, + "step": 21055, + "teacher_loss": 0.17368367314338684 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.610586941242218, + "learning_rate": 6.904462453074297e-06, + "loss": 0.2281, + "step": 21056, + "teacher_loss": 0.18563087284564972 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5640156269073486, + "learning_rate": 6.902550481082743e-06, + "loss": 0.2232, + "step": 21057, + "teacher_loss": 0.18533433973789215 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.39983314275741577, + "learning_rate": 6.900638694741436e-06, + "loss": 0.2388, + "step": 21058, + "teacher_loss": 0.22095569968223572 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5509340763092041, + "learning_rate": 6.898727094094194e-06, + "loss": 0.2533, + "step": 21059, + "teacher_loss": 0.22021010518074036 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.43815234303474426, + "learning_rate": 6.896815679184851e-06, + "loss": 0.2016, + "step": 21060, + "teacher_loss": 0.1753210723400116 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.7544982433319092, + "learning_rate": 6.894904450057234e-06, + "loss": 0.2432, + "step": 21061, + "teacher_loss": 0.18640050292015076 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.2629411220550537, + "learning_rate": 6.8929934067551535e-06, + "loss": 0.2205, + "step": 21062, + "teacher_loss": 0.21574972569942474 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.6879023909568787, + "learning_rate": 6.891082549322433e-06, + "loss": 0.2507, + "step": 21063, + "teacher_loss": 0.2021312713623047 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5753012895584106, + "learning_rate": 6.8891718778028724e-06, + "loss": 0.1953, + "step": 21064, + "teacher_loss": 0.15304477512836456 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3750990629196167, + "learning_rate": 6.887261392240284e-06, + "loss": 0.2342, + "step": 21065, + "teacher_loss": 0.21851582825183868 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3156360387802124, + "learning_rate": 6.8853510926784745e-06, + "loss": 0.1818, + "step": 21066, + "teacher_loss": 0.16691815853118896 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 1.0356909036636353, + "learning_rate": 6.883440979161228e-06, + "loss": 0.4858, + "step": 21067, + "teacher_loss": 0.424721360206604 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5096825361251831, + "learning_rate": 6.881531051732347e-06, + "loss": 0.2962, + "step": 21068, + "teacher_loss": 0.2725079655647278 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.7660454511642456, + "learning_rate": 6.879621310435622e-06, + "loss": 0.2488, + "step": 21069, + "teacher_loss": 0.1913796067237854 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5489686727523804, + "learning_rate": 6.87771175531483e-06, + "loss": 0.2832, + "step": 21070, + "teacher_loss": 0.2536696195602417 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.999770998954773, + "learning_rate": 6.8758023864137555e-06, + "loss": 0.3899, + "step": 21071, + "teacher_loss": 0.3221236765384674 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5881187319755554, + "learning_rate": 6.8738932037761816e-06, + "loss": 0.4927, + "step": 21072, + "teacher_loss": 0.4821445941925049 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.4585700035095215, + "learning_rate": 6.871984207445872e-06, + "loss": 0.2915, + "step": 21073, + "teacher_loss": 0.27295318245887756 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5144942402839661, + "learning_rate": 6.870075397466588e-06, + "loss": 0.2786, + "step": 21074, + "teacher_loss": 0.25237247347831726 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.6914947628974915, + "learning_rate": 6.868166773882103e-06, + "loss": 0.2414, + "step": 21075, + "teacher_loss": 0.19138209521770477 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.204611673951149, + "learning_rate": 6.866258336736178e-06, + "loss": 0.1546, + "step": 21076, + "teacher_loss": 0.14908358454704285 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3015681505203247, + "learning_rate": 6.864350086072556e-06, + "loss": 0.2178, + "step": 21077, + "teacher_loss": 0.20846150815486908 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.26221543550491333, + "learning_rate": 6.862442021934998e-06, + "loss": 0.2108, + "step": 21078, + "teacher_loss": 0.20504575967788696 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.957612156867981, + "learning_rate": 6.860534144367249e-06, + "loss": 0.513, + "step": 21079, + "teacher_loss": 0.4636082053184509 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.36956244707107544, + "learning_rate": 6.858626453413045e-06, + "loss": 0.2305, + "step": 21080, + "teacher_loss": 0.21506652235984802 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.9125055074691772, + "learning_rate": 6.856718949116127e-06, + "loss": 0.2884, + "step": 21081, + "teacher_loss": 0.21903762221336365 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.24365290999412537, + "learning_rate": 6.854811631520235e-06, + "loss": 0.1498, + "step": 21082, + "teacher_loss": 0.13938620686531067 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.363231897354126, + "learning_rate": 6.852904500669085e-06, + "loss": 0.2, + "step": 21083, + "teacher_loss": 0.18183794617652893 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.4659852385520935, + "learning_rate": 6.850997556606409e-06, + "loss": 0.1745, + "step": 21084, + "teacher_loss": 0.14213553071022034 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5907101631164551, + "learning_rate": 6.849090799375931e-06, + "loss": 0.2683, + "step": 21085, + "teacher_loss": 0.23244041204452515 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3744877278804779, + "learning_rate": 6.847184229021365e-06, + "loss": 0.2118, + "step": 21086, + "teacher_loss": 0.19376710057258606 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.21573379635810852, + "learning_rate": 6.8452778455864106e-06, + "loss": 0.2398, + "step": 21087, + "teacher_loss": 0.2424522042274475 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.13263298571109772, + "learning_rate": 6.843371649114797e-06, + "loss": 0.2118, + "step": 21088, + "teacher_loss": 0.22055436670780182 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.09606871008872986, + "learning_rate": 6.8414656396502156e-06, + "loss": 0.1418, + "step": 21089, + "teacher_loss": 0.1468939185142517 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.8944100141525269, + "learning_rate": 6.839559817236362e-06, + "loss": 0.2824, + "step": 21090, + "teacher_loss": 0.21438215672969818 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.7346954345703125, + "learning_rate": 6.8376541819169355e-06, + "loss": 0.5214, + "step": 21091, + "teacher_loss": 0.4976603388786316 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 1.1110830307006836, + "learning_rate": 6.8357487337356304e-06, + "loss": 0.6057, + "step": 21092, + "teacher_loss": 0.5496015548706055 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.31797337532043457, + "learning_rate": 6.833843472736125e-06, + "loss": 0.2363, + "step": 21093, + "teacher_loss": 0.22726817429065704 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.36460769176483154, + "learning_rate": 6.831938398962104e-06, + "loss": 0.2106, + "step": 21094, + "teacher_loss": 0.19353246688842773 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5137042999267578, + "learning_rate": 6.830033512457255e-06, + "loss": 0.2762, + "step": 21095, + "teacher_loss": 0.24976465106010437 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.49862152338027954, + "learning_rate": 6.82812881326524e-06, + "loss": 0.2361, + "step": 21096, + "teacher_loss": 0.20689120888710022 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.4919617772102356, + "learning_rate": 6.826224301429721e-06, + "loss": 0.2545, + "step": 21097, + "teacher_loss": 0.22813186049461365 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.4095369577407837, + "learning_rate": 6.824319976994383e-06, + "loss": 0.3209, + "step": 21098, + "teacher_loss": 0.3110586404800415 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.368999719619751, + "learning_rate": 6.8224158400028765e-06, + "loss": 0.3046, + "step": 21099, + "teacher_loss": 0.29744774103164673 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.42498618364334106, + "learning_rate": 6.8205118904988455e-06, + "loss": 0.1937, + "step": 21100, + "teacher_loss": 0.16805553436279297 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.308207631111145, + "learning_rate": 6.818608128525965e-06, + "loss": 0.2073, + "step": 21101, + "teacher_loss": 0.19603359699249268 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.3554556965827942, + "learning_rate": 6.81670455412787e-06, + "loss": 0.2175, + "step": 21102, + "teacher_loss": 0.20220491290092468 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5371696352958679, + "learning_rate": 6.814801167348201e-06, + "loss": 0.2344, + "step": 21103, + "teacher_loss": 0.20077010989189148 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.8057507872581482, + "learning_rate": 6.8128979682305995e-06, + "loss": 0.2935, + "step": 21104, + "teacher_loss": 0.23659461736679077 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5763615369796753, + "learning_rate": 6.810994956818707e-06, + "loss": 0.2666, + "step": 21105, + "teacher_loss": 0.23222331702709198 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.5847717523574829, + "learning_rate": 6.809092133156142e-06, + "loss": 0.2309, + "step": 21106, + "teacher_loss": 0.19152754545211792 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.32749539613723755, + "learning_rate": 6.807189497286539e-06, + "loss": 0.2192, + "step": 21107, + "teacher_loss": 0.20712804794311523 + }, + { + "compression_loss": 0.0, + "epoch": 3.81, + "label_loss": 0.4948459565639496, + "learning_rate": 6.805287049253522e-06, + "loss": 0.2077, + "step": 21108, + "teacher_loss": 0.17575965821743011 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.44669681787490845, + "learning_rate": 6.8033847891006975e-06, + "loss": 0.1961, + "step": 21109, + "teacher_loss": 0.16829557716846466 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.860389232635498, + "learning_rate": 6.801482716871685e-06, + "loss": 0.3325, + "step": 21110, + "teacher_loss": 0.2738436460494995 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.589352011680603, + "learning_rate": 6.799580832610099e-06, + "loss": 0.2635, + "step": 21111, + "teacher_loss": 0.2272915542125702 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.4365181028842926, + "learning_rate": 6.797679136359534e-06, + "loss": 0.2414, + "step": 21112, + "teacher_loss": 0.21973586082458496 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.14987675845623016, + "learning_rate": 6.795777628163599e-06, + "loss": 0.2141, + "step": 21113, + "teacher_loss": 0.2212480902671814 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.38866978883743286, + "learning_rate": 6.793876308065881e-06, + "loss": 0.2111, + "step": 21114, + "teacher_loss": 0.1913265883922577 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.7936194539070129, + "learning_rate": 6.791975176109981e-06, + "loss": 0.2584, + "step": 21115, + "teacher_loss": 0.19888126850128174 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.2828065752983093, + "learning_rate": 6.790074232339476e-06, + "loss": 0.1843, + "step": 21116, + "teacher_loss": 0.17339789867401123 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.2894996404647827, + "learning_rate": 6.788173476797954e-06, + "loss": 0.1518, + "step": 21117, + "teacher_loss": 0.13650378584861755 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.3175807297229767, + "learning_rate": 6.786272909529e-06, + "loss": 0.1747, + "step": 21118, + "teacher_loss": 0.15884855389595032 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.2899169921875, + "learning_rate": 6.7843725305761746e-06, + "loss": 0.2786, + "step": 21119, + "teacher_loss": 0.27733784914016724 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.5662544965744019, + "learning_rate": 6.7824723399830575e-06, + "loss": 0.2838, + "step": 21120, + "teacher_loss": 0.2524040937423706 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.8492851257324219, + "learning_rate": 6.7805723377932166e-06, + "loss": 0.3373, + "step": 21121, + "teacher_loss": 0.2804449796676636 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.23926222324371338, + "learning_rate": 6.778672524050204e-06, + "loss": 0.1382, + "step": 21122, + "teacher_loss": 0.12692655622959137 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.39224082231521606, + "learning_rate": 6.776772898797586e-06, + "loss": 0.2975, + "step": 21123, + "teacher_loss": 0.2869310975074768 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.6006549596786499, + "learning_rate": 6.774873462078907e-06, + "loss": 0.2443, + "step": 21124, + "teacher_loss": 0.20471206307411194 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.4461630582809448, + "learning_rate": 6.7729742139377186e-06, + "loss": 0.2239, + "step": 21125, + "teacher_loss": 0.1992393285036087 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.3043396472930908, + "learning_rate": 6.77107515441757e-06, + "loss": 0.2154, + "step": 21126, + "teacher_loss": 0.20556125044822693 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.5684664249420166, + "learning_rate": 6.769176283561992e-06, + "loss": 0.2645, + "step": 21127, + "teacher_loss": 0.230694979429245 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.4666040539741516, + "learning_rate": 6.767277601414525e-06, + "loss": 0.2365, + "step": 21128, + "teacher_loss": 0.2109057903289795 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.20061102509498596, + "learning_rate": 6.765379108018704e-06, + "loss": 0.2106, + "step": 21129, + "teacher_loss": 0.2117154747247696 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.18540042638778687, + "learning_rate": 6.763480803418048e-06, + "loss": 0.1787, + "step": 21130, + "teacher_loss": 0.1780080497264862 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.14695845544338226, + "learning_rate": 6.7615826876560806e-06, + "loss": 0.1269, + "step": 21131, + "teacher_loss": 0.12461711466312408 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.3551092743873596, + "learning_rate": 6.759684760776328e-06, + "loss": 0.208, + "step": 21132, + "teacher_loss": 0.19161498546600342 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.31561899185180664, + "learning_rate": 6.757787022822293e-06, + "loss": 0.2137, + "step": 21133, + "teacher_loss": 0.20236268639564514 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.243172287940979, + "learning_rate": 6.75588947383749e-06, + "loss": 0.2175, + "step": 21134, + "teacher_loss": 0.2146616131067276 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.20968863368034363, + "learning_rate": 6.75399211386543e-06, + "loss": 0.1397, + "step": 21135, + "teacher_loss": 0.13196861743927002 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.34811481833457947, + "learning_rate": 6.752094942949608e-06, + "loss": 0.1773, + "step": 21136, + "teacher_loss": 0.15833115577697754 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.3638768196105957, + "learning_rate": 6.750197961133515e-06, + "loss": 0.2358, + "step": 21137, + "teacher_loss": 0.22155886888504028 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.20576980710029602, + "learning_rate": 6.748301168460648e-06, + "loss": 0.1577, + "step": 21138, + "teacher_loss": 0.1523124873638153 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.5612190961837769, + "learning_rate": 6.746404564974501e-06, + "loss": 0.2397, + "step": 21139, + "teacher_loss": 0.2040124088525772 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.6796547770500183, + "learning_rate": 6.744508150718547e-06, + "loss": 0.3164, + "step": 21140, + "teacher_loss": 0.2760828733444214 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.15383681654930115, + "learning_rate": 6.742611925736269e-06, + "loss": 0.1846, + "step": 21141, + "teacher_loss": 0.18803825974464417 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.17699280381202698, + "learning_rate": 6.740715890071148e-06, + "loss": 0.1369, + "step": 21142, + "teacher_loss": 0.13239236176013947 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.2258513867855072, + "learning_rate": 6.738820043766644e-06, + "loss": 0.1992, + "step": 21143, + "teacher_loss": 0.19621768593788147 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.12503355741500854, + "learning_rate": 6.736924386866228e-06, + "loss": 0.1514, + "step": 21144, + "teacher_loss": 0.15431565046310425 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.23177844285964966, + "learning_rate": 6.735028919413367e-06, + "loss": 0.1706, + "step": 21145, + "teacher_loss": 0.16376319527626038 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.35602760314941406, + "learning_rate": 6.733133641451513e-06, + "loss": 0.181, + "step": 21146, + "teacher_loss": 0.16154512763023376 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.5941517949104309, + "learning_rate": 6.7312385530241095e-06, + "loss": 0.1915, + "step": 21147, + "teacher_loss": 0.1467203050851822 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.545018196105957, + "learning_rate": 6.7293436541746254e-06, + "loss": 0.2556, + "step": 21148, + "teacher_loss": 0.22344353795051575 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.27495136857032776, + "learning_rate": 6.727448944946494e-06, + "loss": 0.164, + "step": 21149, + "teacher_loss": 0.15166683495044708 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.379241406917572, + "learning_rate": 6.725554425383146e-06, + "loss": 0.2026, + "step": 21150, + "teacher_loss": 0.18297061324119568 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.34359195828437805, + "learning_rate": 6.723660095528037e-06, + "loss": 0.197, + "step": 21151, + "teacher_loss": 0.18068063259124756 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.5772611498832703, + "learning_rate": 6.721765955424589e-06, + "loss": 0.2756, + "step": 21152, + "teacher_loss": 0.24209725856781006 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.15077432990074158, + "learning_rate": 6.719872005116221e-06, + "loss": 0.1373, + "step": 21153, + "teacher_loss": 0.1357787698507309 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.4526090621948242, + "learning_rate": 6.717978244646364e-06, + "loss": 0.1703, + "step": 21154, + "teacher_loss": 0.13896337151527405 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.3305814266204834, + "learning_rate": 6.716084674058441e-06, + "loss": 0.3119, + "step": 21155, + "teacher_loss": 0.3098183870315552 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.7844934463500977, + "learning_rate": 6.714191293395854e-06, + "loss": 0.3014, + "step": 21156, + "teacher_loss": 0.24774567782878876 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.3782028555870056, + "learning_rate": 6.7122981027020186e-06, + "loss": 0.2073, + "step": 21157, + "teacher_loss": 0.18836012482643127 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.7794773578643799, + "learning_rate": 6.710405102020346e-06, + "loss": 0.3392, + "step": 21158, + "teacher_loss": 0.29029643535614014 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.1706942617893219, + "learning_rate": 6.7085122913942295e-06, + "loss": 0.1666, + "step": 21159, + "teacher_loss": 0.166156604886055 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.5009013414382935, + "learning_rate": 6.706619670867057e-06, + "loss": 0.2085, + "step": 21160, + "teacher_loss": 0.1760426014661789 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.4840961694717407, + "learning_rate": 6.704727240482242e-06, + "loss": 0.2592, + "step": 21161, + "teacher_loss": 0.23425209522247314 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.6796861290931702, + "learning_rate": 6.70283500028316e-06, + "loss": 0.1904, + "step": 21162, + "teacher_loss": 0.13603845238685608 + }, + { + "compression_loss": 0.0, + "epoch": 3.82, + "label_loss": 0.47639307379722595, + "learning_rate": 6.70094295031319e-06, + "loss": 0.2179, + "step": 21163, + "teacher_loss": 0.18918576836585999 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.568954348564148, + "learning_rate": 6.699051090615717e-06, + "loss": 0.1934, + "step": 21164, + "teacher_loss": 0.1516387164592743 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4993837773799896, + "learning_rate": 6.697159421234119e-06, + "loss": 0.3126, + "step": 21165, + "teacher_loss": 0.2917935252189636 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.5164945125579834, + "learning_rate": 6.695267942211757e-06, + "loss": 0.2167, + "step": 21166, + "teacher_loss": 0.18336230516433716 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.3432307839393616, + "learning_rate": 6.693376653592003e-06, + "loss": 0.2664, + "step": 21167, + "teacher_loss": 0.25784116983413696 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.6550067663192749, + "learning_rate": 6.691485555418224e-06, + "loss": 0.2076, + "step": 21168, + "teacher_loss": 0.1579257845878601 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.36990267038345337, + "learning_rate": 6.6895946477337635e-06, + "loss": 0.2064, + "step": 21169, + "teacher_loss": 0.18823090195655823 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.6400458812713623, + "learning_rate": 6.687703930581983e-06, + "loss": 0.2279, + "step": 21170, + "teacher_loss": 0.18209432065486908 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.8232654929161072, + "learning_rate": 6.685813404006235e-06, + "loss": 0.4007, + "step": 21171, + "teacher_loss": 0.35370147228240967 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2232530415058136, + "learning_rate": 6.683923068049853e-06, + "loss": 0.1781, + "step": 21172, + "teacher_loss": 0.17303961515426636 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.5943315029144287, + "learning_rate": 6.682032922756188e-06, + "loss": 0.4468, + "step": 21173, + "teacher_loss": 0.43046078085899353 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.13391068577766418, + "learning_rate": 6.6801429681685644e-06, + "loss": 0.2278, + "step": 21174, + "teacher_loss": 0.23825660347938538 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.6444460153579712, + "learning_rate": 6.678253204330317e-06, + "loss": 0.2619, + "step": 21175, + "teacher_loss": 0.219389870762825 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.33574429154396057, + "learning_rate": 6.676363631284779e-06, + "loss": 0.2321, + "step": 21176, + "teacher_loss": 0.22062653303146362 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.5184218287467957, + "learning_rate": 6.674474249075263e-06, + "loss": 0.294, + "step": 21177, + "teacher_loss": 0.26907235383987427 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2569783926010132, + "learning_rate": 6.672585057745097e-06, + "loss": 0.212, + "step": 21178, + "teacher_loss": 0.20696985721588135 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4303276538848877, + "learning_rate": 6.670696057337583e-06, + "loss": 0.2021, + "step": 21179, + "teacher_loss": 0.1767246425151825 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.38043051958084106, + "learning_rate": 6.668807247896036e-06, + "loss": 0.3754, + "step": 21180, + "teacher_loss": 0.37483131885528564 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.19700020551681519, + "learning_rate": 6.666918629463764e-06, + "loss": 0.2035, + "step": 21181, + "teacher_loss": 0.2042715847492218 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.26751846075057983, + "learning_rate": 6.665030202084061e-06, + "loss": 0.1745, + "step": 21182, + "teacher_loss": 0.1641301065683365 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.311440110206604, + "learning_rate": 6.6631419658002245e-06, + "loss": 0.1908, + "step": 21183, + "teacher_loss": 0.1774301826953888 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.316212922334671, + "learning_rate": 6.661253920655554e-06, + "loss": 0.202, + "step": 21184, + "teacher_loss": 0.18932712078094482 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.29894915223121643, + "learning_rate": 6.659366066693324e-06, + "loss": 0.1777, + "step": 21185, + "teacher_loss": 0.16421669721603394 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.459450364112854, + "learning_rate": 6.657478403956831e-06, + "loss": 0.21, + "step": 21186, + "teacher_loss": 0.18226423859596252 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4174155592918396, + "learning_rate": 6.6555909324893386e-06, + "loss": 0.2314, + "step": 21187, + "teacher_loss": 0.2107028365135193 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.24748720228672028, + "learning_rate": 6.653703652334128e-06, + "loss": 0.1958, + "step": 21188, + "teacher_loss": 0.19006532430648804 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.3583128750324249, + "learning_rate": 6.6518165635344766e-06, + "loss": 0.2353, + "step": 21189, + "teacher_loss": 0.22163251042366028 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2770041525363922, + "learning_rate": 6.649929666133634e-06, + "loss": 0.2207, + "step": 21190, + "teacher_loss": 0.2144688218832016 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.5123680830001831, + "learning_rate": 6.648042960174871e-06, + "loss": 0.2118, + "step": 21191, + "teacher_loss": 0.178436741232872 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.6827540397644043, + "learning_rate": 6.646156445701448e-06, + "loss": 0.2334, + "step": 21192, + "teacher_loss": 0.18350112438201904 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.550858736038208, + "learning_rate": 6.644270122756605e-06, + "loss": 0.326, + "step": 21193, + "teacher_loss": 0.30100661516189575 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4779033064842224, + "learning_rate": 6.6423839913835985e-06, + "loss": 0.2924, + "step": 21194, + "teacher_loss": 0.2718360424041748 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2881973385810852, + "learning_rate": 6.640498051625673e-06, + "loss": 0.2144, + "step": 21195, + "teacher_loss": 0.2062150537967682 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2724769115447998, + "learning_rate": 6.638612303526066e-06, + "loss": 0.2146, + "step": 21196, + "teacher_loss": 0.20814786851406097 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.31559669971466064, + "learning_rate": 6.636726747128003e-06, + "loss": 0.1691, + "step": 21197, + "teacher_loss": 0.15287527441978455 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4456956386566162, + "learning_rate": 6.634841382474721e-06, + "loss": 0.2652, + "step": 21198, + "teacher_loss": 0.24512681365013123 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.3128053545951843, + "learning_rate": 6.632956209609452e-06, + "loss": 0.1949, + "step": 21199, + "teacher_loss": 0.1818285882472992 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2124515324831009, + "learning_rate": 6.6310712285754074e-06, + "loss": 0.1407, + "step": 21200, + "teacher_loss": 0.13277366757392883 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.24811293184757233, + "learning_rate": 6.629186439415807e-06, + "loss": 0.132, + "step": 21201, + "teacher_loss": 0.11911018192768097 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.32954642176628113, + "learning_rate": 6.62730184217387e-06, + "loss": 0.1883, + "step": 21202, + "teacher_loss": 0.17257171869277954 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.47639939188957214, + "learning_rate": 6.625417436892794e-06, + "loss": 0.2801, + "step": 21203, + "teacher_loss": 0.25829243659973145 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.3459671437740326, + "learning_rate": 6.623533223615787e-06, + "loss": 0.1941, + "step": 21204, + "teacher_loss": 0.17720988392829895 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4597497582435608, + "learning_rate": 6.6216492023860546e-06, + "loss": 0.255, + "step": 21205, + "teacher_loss": 0.23227080702781677 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.25717926025390625, + "learning_rate": 6.619765373246782e-06, + "loss": 0.2017, + "step": 21206, + "teacher_loss": 0.1955321580171585 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.29475924372673035, + "learning_rate": 6.6178817362411634e-06, + "loss": 0.1847, + "step": 21207, + "teacher_loss": 0.1724410355091095 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4035297632217407, + "learning_rate": 6.615998291412391e-06, + "loss": 0.3622, + "step": 21208, + "teacher_loss": 0.3575735092163086 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.23649314045906067, + "learning_rate": 6.6141150388036425e-06, + "loss": 0.1641, + "step": 21209, + "teacher_loss": 0.15609237551689148 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.10772210359573364, + "learning_rate": 6.612231978458083e-06, + "loss": 0.1671, + "step": 21210, + "teacher_loss": 0.17372208833694458 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.6323534846305847, + "learning_rate": 6.610349110418907e-06, + "loss": 0.2256, + "step": 21211, + "teacher_loss": 0.18045005202293396 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.17334814369678497, + "learning_rate": 6.60846643472927e-06, + "loss": 0.1481, + "step": 21212, + "teacher_loss": 0.14526762068271637 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.4721274673938751, + "learning_rate": 6.606583951432336e-06, + "loss": 0.3164, + "step": 21213, + "teacher_loss": 0.2990991175174713 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.40152931213378906, + "learning_rate": 6.604701660571267e-06, + "loss": 0.2491, + "step": 21214, + "teacher_loss": 0.23215769231319427 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.6213292479515076, + "learning_rate": 6.602819562189223e-06, + "loss": 0.2539, + "step": 21215, + "teacher_loss": 0.213044673204422 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.581149160861969, + "learning_rate": 6.600937656329345e-06, + "loss": 0.4377, + "step": 21216, + "teacher_loss": 0.42180147767066956 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.7182506322860718, + "learning_rate": 6.599055943034786e-06, + "loss": 0.3285, + "step": 21217, + "teacher_loss": 0.2851560711860657 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.2575365900993347, + "learning_rate": 6.59717442234869e-06, + "loss": 0.192, + "step": 21218, + "teacher_loss": 0.18473955988883972 + }, + { + "compression_loss": 0.0, + "epoch": 3.83, + "label_loss": 0.3901729881763458, + "learning_rate": 6.595293094314194e-06, + "loss": 0.3165, + "step": 21219, + "teacher_loss": 0.3083469271659851 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.12353768944740295, + "learning_rate": 6.593411958974417e-06, + "loss": 0.156, + "step": 21220, + "teacher_loss": 0.15963146090507507 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4487302899360657, + "learning_rate": 6.591531016372509e-06, + "loss": 0.1903, + "step": 21221, + "teacher_loss": 0.16162961721420288 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.7151828408241272, + "learning_rate": 6.5896502665515874e-06, + "loss": 0.2405, + "step": 21222, + "teacher_loss": 0.18773436546325684 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.39963942766189575, + "learning_rate": 6.5877697095547574e-06, + "loss": 0.2176, + "step": 21223, + "teacher_loss": 0.1973292976617813 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.8283702731132507, + "learning_rate": 6.585889345425158e-06, + "loss": 0.2375, + "step": 21224, + "teacher_loss": 0.17180094122886658 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.7733970880508423, + "learning_rate": 6.584009174205888e-06, + "loss": 0.2622, + "step": 21225, + "teacher_loss": 0.20539385080337524 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.25096195936203003, + "learning_rate": 6.582129195940051e-06, + "loss": 0.188, + "step": 21226, + "teacher_loss": 0.18101000785827637 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3072476387023926, + "learning_rate": 6.580249410670753e-06, + "loss": 0.2049, + "step": 21227, + "teacher_loss": 0.19352814555168152 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.6021425724029541, + "learning_rate": 6.5783698184410975e-06, + "loss": 0.2364, + "step": 21228, + "teacher_loss": 0.19581303000450134 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4362931251525879, + "learning_rate": 6.576490419294167e-06, + "loss": 0.2698, + "step": 21229, + "teacher_loss": 0.2512766122817993 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.48105841875076294, + "learning_rate": 6.574611213273056e-06, + "loss": 0.2737, + "step": 21230, + "teacher_loss": 0.25064852833747864 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4490678906440735, + "learning_rate": 6.5727322004208565e-06, + "loss": 0.3315, + "step": 21231, + "teacher_loss": 0.3184799253940582 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3275010287761688, + "learning_rate": 6.570853380780636e-06, + "loss": 0.2409, + "step": 21232, + "teacher_loss": 0.23130479454994202 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.32303979992866516, + "learning_rate": 6.568974754395473e-06, + "loss": 0.2022, + "step": 21233, + "teacher_loss": 0.18879126012325287 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.5101224184036255, + "learning_rate": 6.567096321308449e-06, + "loss": 0.2153, + "step": 21234, + "teacher_loss": 0.18253692984580994 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.2579573392868042, + "learning_rate": 6.565218081562616e-06, + "loss": 0.1492, + "step": 21235, + "teacher_loss": 0.1370660811662674 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4701187312602997, + "learning_rate": 6.56334003520105e-06, + "loss": 0.2028, + "step": 21236, + "teacher_loss": 0.1731138825416565 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.7712534666061401, + "learning_rate": 6.561462182266797e-06, + "loss": 0.3855, + "step": 21237, + "teacher_loss": 0.342593252658844 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3943137526512146, + "learning_rate": 6.559584522802917e-06, + "loss": 0.1945, + "step": 21238, + "teacher_loss": 0.17228448390960693 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.5542356967926025, + "learning_rate": 6.5577070568524625e-06, + "loss": 0.2757, + "step": 21239, + "teacher_loss": 0.24477559328079224 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3369714021682739, + "learning_rate": 6.555829784458469e-06, + "loss": 0.2125, + "step": 21240, + "teacher_loss": 0.19869573414325714 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.46898338198661804, + "learning_rate": 6.553952705663987e-06, + "loss": 0.2675, + "step": 21241, + "teacher_loss": 0.24510252475738525 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.30435502529144287, + "learning_rate": 6.552075820512042e-06, + "loss": 0.1983, + "step": 21242, + "teacher_loss": 0.18647930026054382 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.31451988220214844, + "learning_rate": 6.55019912904567e-06, + "loss": 0.2599, + "step": 21243, + "teacher_loss": 0.2538384199142456 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3133608400821686, + "learning_rate": 6.548322631307902e-06, + "loss": 0.2555, + "step": 21244, + "teacher_loss": 0.2490580528974533 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.17214983701705933, + "learning_rate": 6.546446327341752e-06, + "loss": 0.2, + "step": 21245, + "teacher_loss": 0.20313741266727448 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.474582701921463, + "learning_rate": 6.544570217190249e-06, + "loss": 0.2494, + "step": 21246, + "teacher_loss": 0.2243245542049408 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.38884395360946655, + "learning_rate": 6.542694300896395e-06, + "loss": 0.1991, + "step": 21247, + "teacher_loss": 0.17803409695625305 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3393251895904541, + "learning_rate": 6.540818578503204e-06, + "loss": 0.1801, + "step": 21248, + "teacher_loss": 0.16244950890541077 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.31911084055900574, + "learning_rate": 6.538943050053685e-06, + "loss": 0.1826, + "step": 21249, + "teacher_loss": 0.16745352745056152 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.5622643828392029, + "learning_rate": 6.5370677155908296e-06, + "loss": 0.2686, + "step": 21250, + "teacher_loss": 0.23598787188529968 + }, + { + "epoch": 3.84, + "eval_exact_match": 79.93377483443709, + "eval_f1": 87.44578526535899, + "step": 21250 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3693341612815857, + "learning_rate": 6.535192575157638e-06, + "loss": 0.2087, + "step": 21251, + "teacher_loss": 0.19081524014472961 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3323872685432434, + "learning_rate": 6.5333176287971094e-06, + "loss": 0.1988, + "step": 21252, + "teacher_loss": 0.18392062187194824 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.46378836035728455, + "learning_rate": 6.531442876552216e-06, + "loss": 0.2246, + "step": 21253, + "teacher_loss": 0.1980430632829666 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3981441259384155, + "learning_rate": 6.529568318465948e-06, + "loss": 0.2749, + "step": 21254, + "teacher_loss": 0.2611829936504364 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.8474220633506775, + "learning_rate": 6.527693954581288e-06, + "loss": 0.2616, + "step": 21255, + "teacher_loss": 0.19652634859085083 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4249844253063202, + "learning_rate": 6.5258197849411985e-06, + "loss": 0.2097, + "step": 21256, + "teacher_loss": 0.1857403814792633 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.3462231755256653, + "learning_rate": 6.523945809588655e-06, + "loss": 0.2009, + "step": 21257, + "teacher_loss": 0.1847608983516693 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.38004302978515625, + "learning_rate": 6.522072028566626e-06, + "loss": 0.2491, + "step": 21258, + "teacher_loss": 0.2345041185617447 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.44807034730911255, + "learning_rate": 6.520198441918068e-06, + "loss": 0.2683, + "step": 21259, + "teacher_loss": 0.24829131364822388 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.41762250661849976, + "learning_rate": 6.518325049685931e-06, + "loss": 0.2699, + "step": 21260, + "teacher_loss": 0.25345468521118164 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.8911525011062622, + "learning_rate": 6.516451851913169e-06, + "loss": 0.301, + "step": 21261, + "teacher_loss": 0.23543407022953033 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.5937632918357849, + "learning_rate": 6.5145788486427385e-06, + "loss": 0.1986, + "step": 21262, + "teacher_loss": 0.15471762418746948 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.800538957118988, + "learning_rate": 6.5127060399175695e-06, + "loss": 0.2953, + "step": 21263, + "teacher_loss": 0.23913763463497162 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.20043110847473145, + "learning_rate": 6.510833425780604e-06, + "loss": 0.1748, + "step": 21264, + "teacher_loss": 0.17196637392044067 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4386744499206543, + "learning_rate": 6.508961006274781e-06, + "loss": 0.2532, + "step": 21265, + "teacher_loss": 0.23259669542312622 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.5904057025909424, + "learning_rate": 6.50708878144302e-06, + "loss": 0.2666, + "step": 21266, + "teacher_loss": 0.23065067827701569 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4290776550769806, + "learning_rate": 6.505216751328251e-06, + "loss": 0.26, + "step": 21267, + "teacher_loss": 0.241220623254776 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.2511195242404938, + "learning_rate": 6.503344915973396e-06, + "loss": 0.1958, + "step": 21268, + "teacher_loss": 0.1896679401397705 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.21728770434856415, + "learning_rate": 6.50147327542137e-06, + "loss": 0.2148, + "step": 21269, + "teacher_loss": 0.2145073413848877 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.7306472063064575, + "learning_rate": 6.4996018297150705e-06, + "loss": 0.2975, + "step": 21270, + "teacher_loss": 0.2493753433227539 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.35846054553985596, + "learning_rate": 6.4977305788974266e-06, + "loss": 0.2118, + "step": 21271, + "teacher_loss": 0.1955542117357254 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.4081936478614807, + "learning_rate": 6.49585952301133e-06, + "loss": 0.2623, + "step": 21272, + "teacher_loss": 0.24612005054950714 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.47310203313827515, + "learning_rate": 6.493988662099664e-06, + "loss": 0.316, + "step": 21273, + "teacher_loss": 0.298520565032959 + }, + { + "compression_loss": 0.0, + "epoch": 3.84, + "label_loss": 0.7038092613220215, + "learning_rate": 6.492117996205349e-06, + "loss": 0.2376, + "step": 21274, + "teacher_loss": 0.18574732542037964 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3826104402542114, + "learning_rate": 6.490247525371259e-06, + "loss": 0.2746, + "step": 21275, + "teacher_loss": 0.26265203952789307 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.9430464506149292, + "learning_rate": 6.488377249640277e-06, + "loss": 0.2187, + "step": 21276, + "teacher_loss": 0.13820284605026245 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.7118151187896729, + "learning_rate": 6.486507169055283e-06, + "loss": 0.2432, + "step": 21277, + "teacher_loss": 0.19114740192890167 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 1.2927591800689697, + "learning_rate": 6.4846372836591614e-06, + "loss": 0.5825, + "step": 21278, + "teacher_loss": 0.5035779476165771 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.25623345375061035, + "learning_rate": 6.48276759349477e-06, + "loss": 0.1681, + "step": 21279, + "teacher_loss": 0.15832293033599854 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.4538447856903076, + "learning_rate": 6.480898098604982e-06, + "loss": 0.2756, + "step": 21280, + "teacher_loss": 0.2558346092700958 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.44045794010162354, + "learning_rate": 6.479028799032664e-06, + "loss": 0.2403, + "step": 21281, + "teacher_loss": 0.21802747249603271 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.29872065782546997, + "learning_rate": 6.47715969482067e-06, + "loss": 0.2826, + "step": 21282, + "teacher_loss": 0.28076332807540894 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.5400710105895996, + "learning_rate": 6.47529078601184e-06, + "loss": 0.2674, + "step": 21283, + "teacher_loss": 0.23709625005722046 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.8857929706573486, + "learning_rate": 6.473422072649044e-06, + "loss": 0.3003, + "step": 21284, + "teacher_loss": 0.2352481335401535 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.2900814414024353, + "learning_rate": 6.471553554775116e-06, + "loss": 0.1892, + "step": 21285, + "teacher_loss": 0.17800059914588928 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.7973989844322205, + "learning_rate": 6.469685232432891e-06, + "loss": 0.3332, + "step": 21286, + "teacher_loss": 0.281582236289978 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.7149227857589722, + "learning_rate": 6.467817105665207e-06, + "loss": 0.2395, + "step": 21287, + "teacher_loss": 0.18672248721122742 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.7514445781707764, + "learning_rate": 6.465949174514902e-06, + "loss": 0.4698, + "step": 21288, + "teacher_loss": 0.43846404552459717 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 1.015103816986084, + "learning_rate": 6.464081439024792e-06, + "loss": 0.2986, + "step": 21289, + "teacher_loss": 0.21903246641159058 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.4348888397216797, + "learning_rate": 6.4622138992377e-06, + "loss": 0.2618, + "step": 21290, + "teacher_loss": 0.24254050850868225 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.17433705925941467, + "learning_rate": 6.460346555196453e-06, + "loss": 0.1432, + "step": 21291, + "teacher_loss": 0.13970379531383514 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.2623525559902191, + "learning_rate": 6.45847940694385e-06, + "loss": 0.1488, + "step": 21292, + "teacher_loss": 0.13616153597831726 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.6825239658355713, + "learning_rate": 6.4566124545227055e-06, + "loss": 0.2716, + "step": 21293, + "teacher_loss": 0.22592885792255402 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.6175816655158997, + "learning_rate": 6.454745697975829e-06, + "loss": 0.3044, + "step": 21294, + "teacher_loss": 0.26958343386650085 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.247538223862648, + "learning_rate": 6.4528791373460076e-06, + "loss": 0.2223, + "step": 21295, + "teacher_loss": 0.21953842043876648 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.6099358797073364, + "learning_rate": 6.451012772676047e-06, + "loss": 0.2547, + "step": 21296, + "teacher_loss": 0.2151889204978943 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.26875975728034973, + "learning_rate": 6.449146604008727e-06, + "loss": 0.1818, + "step": 21297, + "teacher_loss": 0.17213866114616394 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.42377060651779175, + "learning_rate": 6.44728063138684e-06, + "loss": 0.1877, + "step": 21298, + "teacher_loss": 0.16150692105293274 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.45346811413764954, + "learning_rate": 6.44541485485317e-06, + "loss": 0.2364, + "step": 21299, + "teacher_loss": 0.21231895685195923 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.7622081637382507, + "learning_rate": 6.443549274450485e-06, + "loss": 0.2955, + "step": 21300, + "teacher_loss": 0.24369731545448303 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.7685917615890503, + "learning_rate": 6.44168389022156e-06, + "loss": 0.2936, + "step": 21301, + "teacher_loss": 0.24083341658115387 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.4388599097728729, + "learning_rate": 6.439818702209171e-06, + "loss": 0.2334, + "step": 21302, + "teacher_loss": 0.2106252908706665 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.6749705076217651, + "learning_rate": 6.437953710456068e-06, + "loss": 0.297, + "step": 21303, + "teacher_loss": 0.25495949387550354 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.34914135932922363, + "learning_rate": 6.436088915005021e-06, + "loss": 0.1789, + "step": 21304, + "teacher_loss": 0.15993613004684448 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.296528160572052, + "learning_rate": 6.434224315898775e-06, + "loss": 0.1575, + "step": 21305, + "teacher_loss": 0.14201316237449646 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.392334908246994, + "learning_rate": 6.432359913180084e-06, + "loss": 0.2573, + "step": 21306, + "teacher_loss": 0.24233193695545197 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.6466615796089172, + "learning_rate": 6.430495706891698e-06, + "loss": 0.2676, + "step": 21307, + "teacher_loss": 0.22547055780887604 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3968576490879059, + "learning_rate": 6.428631697076347e-06, + "loss": 0.2107, + "step": 21308, + "teacher_loss": 0.18996086716651917 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.21296623349189758, + "learning_rate": 6.42676788377678e-06, + "loss": 0.1977, + "step": 21309, + "teacher_loss": 0.19597885012626648 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.25313711166381836, + "learning_rate": 6.424904267035715e-06, + "loss": 0.19, + "step": 21310, + "teacher_loss": 0.18298307061195374 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3535030484199524, + "learning_rate": 6.423040846895884e-06, + "loss": 0.1931, + "step": 21311, + "teacher_loss": 0.17532047629356384 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.4789242446422577, + "learning_rate": 6.421177623400019e-06, + "loss": 0.293, + "step": 21312, + "teacher_loss": 0.272366464138031 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3339740037918091, + "learning_rate": 6.419314596590824e-06, + "loss": 0.184, + "step": 21313, + "teacher_loss": 0.16729968786239624 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.5131508111953735, + "learning_rate": 6.41745176651102e-06, + "loss": 0.2186, + "step": 21314, + "teacher_loss": 0.18590402603149414 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3310840129852295, + "learning_rate": 6.4155891332033196e-06, + "loss": 0.2974, + "step": 21315, + "teacher_loss": 0.29366785287857056 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.5613046884536743, + "learning_rate": 6.413726696710419e-06, + "loss": 0.249, + "step": 21316, + "teacher_loss": 0.21428847312927246 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.38214248418807983, + "learning_rate": 6.41186445707502e-06, + "loss": 0.2635, + "step": 21317, + "teacher_loss": 0.25026655197143555 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.5044153332710266, + "learning_rate": 6.410002414339826e-06, + "loss": 0.3077, + "step": 21318, + "teacher_loss": 0.2858063578605652 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.5468268990516663, + "learning_rate": 6.408140568547524e-06, + "loss": 0.2552, + "step": 21319, + "teacher_loss": 0.22278521955013275 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.6593981981277466, + "learning_rate": 6.406278919740788e-06, + "loss": 0.2298, + "step": 21320, + "teacher_loss": 0.182090163230896 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3173362910747528, + "learning_rate": 6.404417467962322e-06, + "loss": 0.1603, + "step": 21321, + "teacher_loss": 0.14279726147651672 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.23680120706558228, + "learning_rate": 6.402556213254792e-06, + "loss": 0.1861, + "step": 21322, + "teacher_loss": 0.1804875135421753 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.8890953660011292, + "learning_rate": 6.400695155660866e-06, + "loss": 0.2989, + "step": 21323, + "teacher_loss": 0.23326990008354187 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.4791303277015686, + "learning_rate": 6.3988342952232195e-06, + "loss": 0.2918, + "step": 21324, + "teacher_loss": 0.27100297808647156 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3956109881401062, + "learning_rate": 6.396973631984519e-06, + "loss": 0.2394, + "step": 21325, + "teacher_loss": 0.22199493646621704 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.37856486439704895, + "learning_rate": 6.3951131659874145e-06, + "loss": 0.2143, + "step": 21326, + "teacher_loss": 0.1960412859916687 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.2856766879558563, + "learning_rate": 6.393252897274567e-06, + "loss": 0.1705, + "step": 21327, + "teacher_loss": 0.15773595869541168 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.4091029763221741, + "learning_rate": 6.391392825888632e-06, + "loss": 0.2519, + "step": 21328, + "teacher_loss": 0.23444359004497528 + }, + { + "compression_loss": 0.0, + "epoch": 3.85, + "label_loss": 0.3835133910179138, + "learning_rate": 6.389532951872242e-06, + "loss": 0.2369, + "step": 21329, + "teacher_loss": 0.22056493163108826 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.49710577726364136, + "learning_rate": 6.387673275268048e-06, + "loss": 0.3325, + "step": 21330, + "teacher_loss": 0.31423354148864746 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.6124380826950073, + "learning_rate": 6.385813796118689e-06, + "loss": 0.3479, + "step": 21331, + "teacher_loss": 0.31848978996276855 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.3856092095375061, + "learning_rate": 6.383954514466793e-06, + "loss": 0.2487, + "step": 21332, + "teacher_loss": 0.23353232443332672 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.270458459854126, + "learning_rate": 6.382095430354978e-06, + "loss": 0.1988, + "step": 21333, + "teacher_loss": 0.19085386395454407 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.4799838960170746, + "learning_rate": 6.380236543825887e-06, + "loss": 0.2807, + "step": 21334, + "teacher_loss": 0.2585732936859131 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.6220208406448364, + "learning_rate": 6.3783778549221285e-06, + "loss": 0.2217, + "step": 21335, + "teacher_loss": 0.17724472284317017 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.45883625745773315, + "learning_rate": 6.376519363686312e-06, + "loss": 0.2282, + "step": 21336, + "teacher_loss": 0.20257119834423065 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.2728968858718872, + "learning_rate": 6.374661070161051e-06, + "loss": 0.1736, + "step": 21337, + "teacher_loss": 0.1626073569059372 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.41584211587905884, + "learning_rate": 6.3728029743889586e-06, + "loss": 0.2366, + "step": 21338, + "teacher_loss": 0.21667324006557465 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.4721145033836365, + "learning_rate": 6.370945076412622e-06, + "loss": 0.2967, + "step": 21339, + "teacher_loss": 0.2772143483161926 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.44651615619659424, + "learning_rate": 6.3690873762746425e-06, + "loss": 0.2668, + "step": 21340, + "teacher_loss": 0.24687179923057556 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.37927573919296265, + "learning_rate": 6.3672298740176195e-06, + "loss": 0.2322, + "step": 21341, + "teacher_loss": 0.2158384472131729 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.654780924320221, + "learning_rate": 6.365372569684126e-06, + "loss": 0.2321, + "step": 21342, + "teacher_loss": 0.18510353565216064 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.3908180892467499, + "learning_rate": 6.363515463316752e-06, + "loss": 0.1835, + "step": 21343, + "teacher_loss": 0.16049689054489136 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.3326825499534607, + "learning_rate": 6.361658554958082e-06, + "loss": 0.2838, + "step": 21344, + "teacher_loss": 0.27838772535324097 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.16537687182426453, + "learning_rate": 6.359801844650681e-06, + "loss": 0.1517, + "step": 21345, + "teacher_loss": 0.15020406246185303 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.16807019710540771, + "learning_rate": 6.357945332437108e-06, + "loss": 0.1459, + "step": 21346, + "teacher_loss": 0.14348533749580383 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.7297033071517944, + "learning_rate": 6.35608901835995e-06, + "loss": 0.3378, + "step": 21347, + "teacher_loss": 0.29423487186431885 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.8914479613304138, + "learning_rate": 6.354232902461754e-06, + "loss": 0.2454, + "step": 21348, + "teacher_loss": 0.17363935708999634 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.350069522857666, + "learning_rate": 6.352376984785072e-06, + "loss": 0.1419, + "step": 21349, + "teacher_loss": 0.11879783868789673 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.30284547805786133, + "learning_rate": 6.350521265372458e-06, + "loss": 0.1861, + "step": 21350, + "teacher_loss": 0.1731119453907013 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5110034346580505, + "learning_rate": 6.348665744266466e-06, + "loss": 0.3736, + "step": 21351, + "teacher_loss": 0.35834556818008423 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.42597055435180664, + "learning_rate": 6.346810421509623e-06, + "loss": 0.3195, + "step": 21352, + "teacher_loss": 0.3076527714729309 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.7577675580978394, + "learning_rate": 6.3449552971444735e-06, + "loss": 0.2453, + "step": 21353, + "teacher_loss": 0.18836981058120728 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.24229231476783752, + "learning_rate": 6.343100371213555e-06, + "loss": 0.1843, + "step": 21354, + "teacher_loss": 0.17787739634513855 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.8748788237571716, + "learning_rate": 6.341245643759386e-06, + "loss": 0.3982, + "step": 21355, + "teacher_loss": 0.34520116448402405 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.6479318737983704, + "learning_rate": 6.339391114824494e-06, + "loss": 0.275, + "step": 21356, + "teacher_loss": 0.23356686532497406 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.34384024143218994, + "learning_rate": 6.337536784451403e-06, + "loss": 0.1711, + "step": 21357, + "teacher_loss": 0.15194770693778992 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5623119473457336, + "learning_rate": 6.335682652682616e-06, + "loss": 0.1986, + "step": 21358, + "teacher_loss": 0.1581922471523285 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.502917468547821, + "learning_rate": 6.333828719560655e-06, + "loss": 0.1933, + "step": 21359, + "teacher_loss": 0.15886220335960388 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.2588789165019989, + "learning_rate": 6.331974985128014e-06, + "loss": 0.1729, + "step": 21360, + "teacher_loss": 0.16331566870212555 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5945191383361816, + "learning_rate": 6.330121449427198e-06, + "loss": 0.2449, + "step": 21361, + "teacher_loss": 0.2061018943786621 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.3617771565914154, + "learning_rate": 6.328268112500709e-06, + "loss": 0.1848, + "step": 21362, + "teacher_loss": 0.16519123315811157 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.7821840047836304, + "learning_rate": 6.326414974391026e-06, + "loss": 0.2769, + "step": 21363, + "teacher_loss": 0.22077956795692444 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.43017128109931946, + "learning_rate": 6.324562035140651e-06, + "loss": 0.1804, + "step": 21364, + "teacher_loss": 0.1526479870080948 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.3873622715473175, + "learning_rate": 6.322709294792051e-06, + "loss": 0.184, + "step": 21365, + "teacher_loss": 0.16139712929725647 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5269544720649719, + "learning_rate": 6.320856753387712e-06, + "loss": 0.2897, + "step": 21366, + "teacher_loss": 0.2633237838745117 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.25156816840171814, + "learning_rate": 6.319004410970112e-06, + "loss": 0.1453, + "step": 21367, + "teacher_loss": 0.13350479304790497 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.7302528023719788, + "learning_rate": 6.317152267581706e-06, + "loss": 0.6018, + "step": 21368, + "teacher_loss": 0.5875493288040161 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.3036249279975891, + "learning_rate": 6.315300323264974e-06, + "loss": 0.1993, + "step": 21369, + "teacher_loss": 0.18773885071277618 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.45646196603775024, + "learning_rate": 6.31344857806236e-06, + "loss": 0.199, + "step": 21370, + "teacher_loss": 0.17034044861793518 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5490099191665649, + "learning_rate": 6.311597032016328e-06, + "loss": 0.2668, + "step": 21371, + "teacher_loss": 0.23541852831840515 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.43646240234375, + "learning_rate": 6.3097456851693316e-06, + "loss": 0.21, + "step": 21372, + "teacher_loss": 0.18478283286094666 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.7461293935775757, + "learning_rate": 6.307894537563807e-06, + "loss": 0.2903, + "step": 21373, + "teacher_loss": 0.23969972133636475 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.7128242254257202, + "learning_rate": 6.3060435892422005e-06, + "loss": 0.2532, + "step": 21374, + "teacher_loss": 0.20215964317321777 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.26486867666244507, + "learning_rate": 6.3041928402469525e-06, + "loss": 0.2536, + "step": 21375, + "teacher_loss": 0.25234732031822205 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.2693418860435486, + "learning_rate": 6.302342290620488e-06, + "loss": 0.2109, + "step": 21376, + "teacher_loss": 0.2043648064136505 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.2843697667121887, + "learning_rate": 6.3004919404052376e-06, + "loss": 0.2217, + "step": 21377, + "teacher_loss": 0.2147739827632904 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5605969429016113, + "learning_rate": 6.298641789643629e-06, + "loss": 0.3499, + "step": 21378, + "teacher_loss": 0.3265239894390106 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.20030370354652405, + "learning_rate": 6.29679183837807e-06, + "loss": 0.1543, + "step": 21379, + "teacher_loss": 0.1492004096508026 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.4723861813545227, + "learning_rate": 6.294942086650981e-06, + "loss": 0.235, + "step": 21380, + "teacher_loss": 0.20859014987945557 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.6509625315666199, + "learning_rate": 6.2930925345047784e-06, + "loss": 0.2998, + "step": 21381, + "teacher_loss": 0.2607758045196533 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.23299269378185272, + "learning_rate": 6.291243181981857e-06, + "loss": 0.2184, + "step": 21382, + "teacher_loss": 0.21673761308193207 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.10066620260477066, + "learning_rate": 6.289394029124608e-06, + "loss": 0.2047, + "step": 21383, + "teacher_loss": 0.2163110375404358 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.5919342041015625, + "learning_rate": 6.287545075975451e-06, + "loss": 0.2081, + "step": 21384, + "teacher_loss": 0.16545751690864563 + }, + { + "compression_loss": 0.0, + "epoch": 3.86, + "label_loss": 0.6712451577186584, + "learning_rate": 6.285696322576764e-06, + "loss": 0.2244, + "step": 21385, + "teacher_loss": 0.17476502060890198 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.34813517332077026, + "learning_rate": 6.283847768970927e-06, + "loss": 0.1884, + "step": 21386, + "teacher_loss": 0.17064355313777924 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.23295201361179352, + "learning_rate": 6.281999415200329e-06, + "loss": 0.231, + "step": 21387, + "teacher_loss": 0.23081165552139282 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.34332409501075745, + "learning_rate": 6.280151261307351e-06, + "loss": 0.2099, + "step": 21388, + "teacher_loss": 0.19511368870735168 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.4440908133983612, + "learning_rate": 6.278303307334357e-06, + "loss": 0.1991, + "step": 21389, + "teacher_loss": 0.1718633770942688 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.24108481407165527, + "learning_rate": 6.276455553323717e-06, + "loss": 0.1692, + "step": 21390, + "teacher_loss": 0.16126412153244019 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.43409717082977295, + "learning_rate": 6.274607999317804e-06, + "loss": 0.2205, + "step": 21391, + "teacher_loss": 0.19671514630317688 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5630717277526855, + "learning_rate": 6.27276064535897e-06, + "loss": 0.229, + "step": 21392, + "teacher_loss": 0.19190245866775513 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.24490173161029816, + "learning_rate": 6.270913491489556e-06, + "loss": 0.2057, + "step": 21393, + "teacher_loss": 0.2013475000858307 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.4531782865524292, + "learning_rate": 6.269066537751936e-06, + "loss": 0.5419, + "step": 21394, + "teacher_loss": 0.5517560243606567 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.2280678004026413, + "learning_rate": 6.267219784188443e-06, + "loss": 0.1915, + "step": 21395, + "teacher_loss": 0.18748915195465088 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.9340258836746216, + "learning_rate": 6.265373230841409e-06, + "loss": 0.2911, + "step": 21396, + "teacher_loss": 0.21962030231952667 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5182957649230957, + "learning_rate": 6.2635268777531896e-06, + "loss": 0.4731, + "step": 21397, + "teacher_loss": 0.4681251049041748 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.641831636428833, + "learning_rate": 6.261680724966104e-06, + "loss": 0.2431, + "step": 21398, + "teacher_loss": 0.1987903118133545 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3692972660064697, + "learning_rate": 6.259834772522477e-06, + "loss": 0.1932, + "step": 21399, + "teacher_loss": 0.17364542186260223 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.32787269353866577, + "learning_rate": 6.257989020464633e-06, + "loss": 0.1981, + "step": 21400, + "teacher_loss": 0.18366721272468567 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.2867777943611145, + "learning_rate": 6.2561434688348965e-06, + "loss": 0.2023, + "step": 21401, + "teacher_loss": 0.19293692708015442 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3665568232536316, + "learning_rate": 6.25429811767557e-06, + "loss": 0.2142, + "step": 21402, + "teacher_loss": 0.19722774624824524 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.6526110768318176, + "learning_rate": 6.252452967028965e-06, + "loss": 0.3142, + "step": 21403, + "teacher_loss": 0.27665427327156067 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.16048680245876312, + "learning_rate": 6.250608016937394e-06, + "loss": 0.1532, + "step": 21404, + "teacher_loss": 0.15236012637615204 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.41316312551498413, + "learning_rate": 6.248763267443144e-06, + "loss": 0.2407, + "step": 21405, + "teacher_loss": 0.22158949077129364 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.49262863397598267, + "learning_rate": 6.246918718588513e-06, + "loss": 0.2785, + "step": 21406, + "teacher_loss": 0.2547357678413391 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5956610441207886, + "learning_rate": 6.245074370415799e-06, + "loss": 0.2521, + "step": 21407, + "teacher_loss": 0.213896244764328 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3322790563106537, + "learning_rate": 6.243230222967282e-06, + "loss": 0.2078, + "step": 21408, + "teacher_loss": 0.19392454624176025 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5611691474914551, + "learning_rate": 6.241386276285236e-06, + "loss": 0.274, + "step": 21409, + "teacher_loss": 0.24212554097175598 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.7483846545219421, + "learning_rate": 6.239542530411941e-06, + "loss": 0.2189, + "step": 21410, + "teacher_loss": 0.16011865437030792 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5295722484588623, + "learning_rate": 6.237698985389678e-06, + "loss": 0.2254, + "step": 21411, + "teacher_loss": 0.19156429171562195 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3596239686012268, + "learning_rate": 6.2358556412607e-06, + "loss": 0.1845, + "step": 21412, + "teacher_loss": 0.16499629616737366 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.10400324314832687, + "learning_rate": 6.2340124980672775e-06, + "loss": 0.129, + "step": 21413, + "teacher_loss": 0.13176730275154114 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3605911433696747, + "learning_rate": 6.2321695558516705e-06, + "loss": 0.163, + "step": 21414, + "teacher_loss": 0.1410941481590271 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.21043020486831665, + "learning_rate": 6.230326814656124e-06, + "loss": 0.1989, + "step": 21415, + "teacher_loss": 0.19761121273040771 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5109516382217407, + "learning_rate": 6.22848427452289e-06, + "loss": 0.2639, + "step": 21416, + "teacher_loss": 0.23645088076591492 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.808468222618103, + "learning_rate": 6.226641935494218e-06, + "loss": 0.3006, + "step": 21417, + "teacher_loss": 0.24420185387134552 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.48544827103614807, + "learning_rate": 6.22479979761234e-06, + "loss": 0.2029, + "step": 21418, + "teacher_loss": 0.17149657011032104 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5281727313995361, + "learning_rate": 6.222957860919496e-06, + "loss": 0.2252, + "step": 21419, + "teacher_loss": 0.19158722460269928 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.4462193250656128, + "learning_rate": 6.221116125457907e-06, + "loss": 0.2328, + "step": 21420, + "teacher_loss": 0.20905530452728271 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.17726922035217285, + "learning_rate": 6.219274591269807e-06, + "loss": 0.1494, + "step": 21421, + "teacher_loss": 0.14630496501922607 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.35523146390914917, + "learning_rate": 6.217433258397419e-06, + "loss": 0.1905, + "step": 21422, + "teacher_loss": 0.17218877375125885 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.45175766944885254, + "learning_rate": 6.21559212688295e-06, + "loss": 0.2733, + "step": 21423, + "teacher_loss": 0.2534489035606384 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.41182851791381836, + "learning_rate": 6.2137511967686164e-06, + "loss": 0.2058, + "step": 21424, + "teacher_loss": 0.18291160464286804 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3489312529563904, + "learning_rate": 6.211910468096631e-06, + "loss": 0.1691, + "step": 21425, + "teacher_loss": 0.14910775423049927 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5681257247924805, + "learning_rate": 6.210069940909185e-06, + "loss": 0.2148, + "step": 21426, + "teacher_loss": 0.17553642392158508 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5742425918579102, + "learning_rate": 6.208229615248486e-06, + "loss": 0.2861, + "step": 21427, + "teacher_loss": 0.25411492586135864 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.19894939661026, + "learning_rate": 6.2063894911567185e-06, + "loss": 0.1735, + "step": 21428, + "teacher_loss": 0.17070996761322021 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.2264716625213623, + "learning_rate": 6.204549568676075e-06, + "loss": 0.1985, + "step": 21429, + "teacher_loss": 0.19544637203216553 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.7795950174331665, + "learning_rate": 6.202709847848746e-06, + "loss": 0.26, + "step": 21430, + "teacher_loss": 0.20230337977409363 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.4625800848007202, + "learning_rate": 6.2008703287168975e-06, + "loss": 0.2143, + "step": 21431, + "teacher_loss": 0.18675881624221802 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.25561806559562683, + "learning_rate": 6.199031011322718e-06, + "loss": 0.188, + "step": 21432, + "teacher_loss": 0.18051858246326447 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.29560545086860657, + "learning_rate": 6.197191895708364e-06, + "loss": 0.1676, + "step": 21433, + "teacher_loss": 0.15337711572647095 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5510247945785522, + "learning_rate": 6.195352981916009e-06, + "loss": 0.2682, + "step": 21434, + "teacher_loss": 0.23672299087047577 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.36915671825408936, + "learning_rate": 6.1935142699878175e-06, + "loss": 0.2312, + "step": 21435, + "teacher_loss": 0.2158956527709961 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.4329996109008789, + "learning_rate": 6.191675759965935e-06, + "loss": 0.2449, + "step": 21436, + "teacher_loss": 0.22402054071426392 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3481356203556061, + "learning_rate": 6.189837451892519e-06, + "loss": 0.1868, + "step": 21437, + "teacher_loss": 0.16886171698570251 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.3584153652191162, + "learning_rate": 6.1879993458097215e-06, + "loss": 0.1898, + "step": 21438, + "teacher_loss": 0.17111417651176453 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.5717709064483643, + "learning_rate": 6.186161441759672e-06, + "loss": 0.2785, + "step": 21439, + "teacher_loss": 0.24594587087631226 + }, + { + "compression_loss": 0.0, + "epoch": 3.87, + "label_loss": 0.6559672355651855, + "learning_rate": 6.184323739784517e-06, + "loss": 0.3762, + "step": 21440, + "teacher_loss": 0.34516775608062744 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.1212519109249115, + "learning_rate": 6.182486239926393e-06, + "loss": 0.1736, + "step": 21441, + "teacher_loss": 0.17938290536403656 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.16818839311599731, + "learning_rate": 6.180648942227424e-06, + "loss": 0.1602, + "step": 21442, + "teacher_loss": 0.1592603325843811 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.42899173498153687, + "learning_rate": 6.17881184672972e-06, + "loss": 0.2292, + "step": 21443, + "teacher_loss": 0.20702405273914337 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.274258553981781, + "learning_rate": 6.1769749534754265e-06, + "loss": 0.2151, + "step": 21444, + "teacher_loss": 0.20849429070949554 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.2547885477542877, + "learning_rate": 6.175138262506643e-06, + "loss": 0.1497, + "step": 21445, + "teacher_loss": 0.13806220889091492 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.18894247710704803, + "learning_rate": 6.17330177386547e-06, + "loss": 0.2079, + "step": 21446, + "teacher_loss": 0.21003150939941406 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.48209500312805176, + "learning_rate": 6.171465487594035e-06, + "loss": 0.2056, + "step": 21447, + "teacher_loss": 0.17487266659736633 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.09564375132322311, + "learning_rate": 6.169629403734426e-06, + "loss": 0.203, + "step": 21448, + "teacher_loss": 0.21490024030208588 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.7101892232894897, + "learning_rate": 6.1677935223287365e-06, + "loss": 0.3467, + "step": 21449, + "teacher_loss": 0.3063589930534363 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.7390053868293762, + "learning_rate": 6.16595784341906e-06, + "loss": 0.4626, + "step": 21450, + "teacher_loss": 0.43187761306762695 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.6143171787261963, + "learning_rate": 6.164122367047489e-06, + "loss": 0.2819, + "step": 21451, + "teacher_loss": 0.24492789804935455 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.27114546298980713, + "learning_rate": 6.162287093256096e-06, + "loss": 0.1589, + "step": 21452, + "teacher_loss": 0.14645709097385406 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.24327969551086426, + "learning_rate": 6.160452022086962e-06, + "loss": 0.2065, + "step": 21453, + "teacher_loss": 0.20244361460208893 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.7598296999931335, + "learning_rate": 6.158617153582169e-06, + "loss": 0.476, + "step": 21454, + "teacher_loss": 0.4445103406906128 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.3083192706108093, + "learning_rate": 6.156782487783775e-06, + "loss": 0.1791, + "step": 21455, + "teacher_loss": 0.16478794813156128 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.16763943433761597, + "learning_rate": 6.1549480247338345e-06, + "loss": 0.2516, + "step": 21456, + "teacher_loss": 0.260894775390625 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.26820459961891174, + "learning_rate": 6.153113764474428e-06, + "loss": 0.1483, + "step": 21457, + "teacher_loss": 0.13497145473957062 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.08898498862981796, + "learning_rate": 6.151279707047597e-06, + "loss": 0.1167, + "step": 21458, + "teacher_loss": 0.11980529129505157 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.8061720728874207, + "learning_rate": 6.149445852495389e-06, + "loss": 0.2652, + "step": 21459, + "teacher_loss": 0.20512481033802032 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.4401913583278656, + "learning_rate": 6.147612200859852e-06, + "loss": 0.2096, + "step": 21460, + "teacher_loss": 0.1840098798274994 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5051714777946472, + "learning_rate": 6.145778752183031e-06, + "loss": 0.2232, + "step": 21461, + "teacher_loss": 0.1918383538722992 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.4408552050590515, + "learning_rate": 6.143945506506952e-06, + "loss": 0.1817, + "step": 21462, + "teacher_loss": 0.15292873978614807 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.6029576659202576, + "learning_rate": 6.14211246387365e-06, + "loss": 0.2337, + "step": 21463, + "teacher_loss": 0.19271501898765564 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.20352861285209656, + "learning_rate": 6.14027962432516e-06, + "loss": 0.2357, + "step": 21464, + "teacher_loss": 0.2392827570438385 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 1.1519109010696411, + "learning_rate": 6.1384469879034875e-06, + "loss": 0.3807, + "step": 21465, + "teacher_loss": 0.29503172636032104 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.7840806245803833, + "learning_rate": 6.1366145546506576e-06, + "loss": 0.406, + "step": 21466, + "teacher_loss": 0.36403924226760864 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.9405517578125, + "learning_rate": 6.134782324608687e-06, + "loss": 0.2789, + "step": 21467, + "teacher_loss": 0.2053447663784027 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.9230841398239136, + "learning_rate": 6.13295029781958e-06, + "loss": 0.497, + "step": 21468, + "teacher_loss": 0.44969502091407776 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.23191297054290771, + "learning_rate": 6.131118474325326e-06, + "loss": 0.2306, + "step": 21469, + "teacher_loss": 0.2304980754852295 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.6510720252990723, + "learning_rate": 6.129286854167945e-06, + "loss": 0.2657, + "step": 21470, + "teacher_loss": 0.22289106249809265 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.44027066230773926, + "learning_rate": 6.127455437389421e-06, + "loss": 0.2931, + "step": 21471, + "teacher_loss": 0.2767890691757202 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.27611273527145386, + "learning_rate": 6.125624224031739e-06, + "loss": 0.158, + "step": 21472, + "teacher_loss": 0.14486472308635712 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.249456524848938, + "learning_rate": 6.1237932141368855e-06, + "loss": 0.2192, + "step": 21473, + "teacher_loss": 0.21579775214195251 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.19588732719421387, + "learning_rate": 6.1219624077468464e-06, + "loss": 0.1868, + "step": 21474, + "teacher_loss": 0.18575289845466614 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.44268372654914856, + "learning_rate": 6.120131804903587e-06, + "loss": 0.2231, + "step": 21475, + "teacher_loss": 0.1986725628376007 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.8620580434799194, + "learning_rate": 6.118301405649082e-06, + "loss": 0.4099, + "step": 21476, + "teacher_loss": 0.35969996452331543 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.39401498436927795, + "learning_rate": 6.116471210025302e-06, + "loss": 0.2492, + "step": 21477, + "teacher_loss": 0.2331630140542984 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5513173341751099, + "learning_rate": 6.114641218074199e-06, + "loss": 0.4222, + "step": 21478, + "teacher_loss": 0.40780752897262573 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.39853644371032715, + "learning_rate": 6.112811429837732e-06, + "loss": 0.2557, + "step": 21479, + "teacher_loss": 0.23978257179260254 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.1522049605846405, + "learning_rate": 6.110981845357858e-06, + "loss": 0.1726, + "step": 21480, + "teacher_loss": 0.17486245930194855 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.7479170560836792, + "learning_rate": 6.109152464676515e-06, + "loss": 0.2199, + "step": 21481, + "teacher_loss": 0.1611787974834442 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.23468512296676636, + "learning_rate": 6.107323287835656e-06, + "loss": 0.1747, + "step": 21482, + "teacher_loss": 0.16800400614738464 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.6933165192604065, + "learning_rate": 6.1054943148772065e-06, + "loss": 0.3515, + "step": 21483, + "teacher_loss": 0.31354331970214844 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5207126140594482, + "learning_rate": 6.1036655458431065e-06, + "loss": 0.2861, + "step": 21484, + "teacher_loss": 0.2600601315498352 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.3583377003669739, + "learning_rate": 6.101836980775286e-06, + "loss": 0.2024, + "step": 21485, + "teacher_loss": 0.1850317120552063 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.3899415135383606, + "learning_rate": 6.100008619715661e-06, + "loss": 0.2073, + "step": 21486, + "teacher_loss": 0.1869933009147644 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.621436595916748, + "learning_rate": 6.098180462706155e-06, + "loss": 0.4256, + "step": 21487, + "teacher_loss": 0.4038439393043518 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5359862446784973, + "learning_rate": 6.096352509788688e-06, + "loss": 0.2321, + "step": 21488, + "teacher_loss": 0.1983642280101776 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.6086143851280212, + "learning_rate": 6.094524761005156e-06, + "loss": 0.238, + "step": 21489, + "teacher_loss": 0.19681468605995178 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5277094841003418, + "learning_rate": 6.092697216397478e-06, + "loss": 0.3022, + "step": 21490, + "teacher_loss": 0.2771047055721283 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5472970008850098, + "learning_rate": 6.090869876007543e-06, + "loss": 0.3557, + "step": 21491, + "teacher_loss": 0.33446234464645386 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.3187077045440674, + "learning_rate": 6.089042739877255e-06, + "loss": 0.1735, + "step": 21492, + "teacher_loss": 0.15733088552951813 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5366401076316833, + "learning_rate": 6.087215808048496e-06, + "loss": 0.247, + "step": 21493, + "teacher_loss": 0.21485620737075806 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.21971789002418518, + "learning_rate": 6.085389080563155e-06, + "loss": 0.2024, + "step": 21494, + "teacher_loss": 0.20045116543769836 + }, + { + "compression_loss": 0.0, + "epoch": 3.88, + "label_loss": 0.5270112156867981, + "learning_rate": 6.083562557463121e-06, + "loss": 0.2658, + "step": 21495, + "teacher_loss": 0.23676815629005432 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.22443246841430664, + "learning_rate": 6.08173623879026e-06, + "loss": 0.1647, + "step": 21496, + "teacher_loss": 0.15806543827056885 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.388766884803772, + "learning_rate": 6.079910124586448e-06, + "loss": 0.224, + "step": 21497, + "teacher_loss": 0.20573511719703674 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.26085931062698364, + "learning_rate": 6.078084214893559e-06, + "loss": 0.2179, + "step": 21498, + "teacher_loss": 0.2131008505821228 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.6068795323371887, + "learning_rate": 6.076258509753444e-06, + "loss": 0.2429, + "step": 21499, + "teacher_loss": 0.20245599746704102 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.17952454090118408, + "learning_rate": 6.074433009207967e-06, + "loss": 0.2165, + "step": 21500, + "teacher_loss": 0.22061416506767273 + }, + { + "epoch": 3.89, + "eval_exact_match": 80.60548722800378, + "eval_f1": 87.91853036863203, + "step": 21500 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.17898796498775482, + "learning_rate": 6.0726077132989844e-06, + "loss": 0.1551, + "step": 21501, + "teacher_loss": 0.1524914801120758 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.6426769495010376, + "learning_rate": 6.070782622068338e-06, + "loss": 0.2848, + "step": 21502, + "teacher_loss": 0.24501320719718933 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.5454964637756348, + "learning_rate": 6.068957735557873e-06, + "loss": 0.252, + "step": 21503, + "teacher_loss": 0.21938394010066986 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.5013855695724487, + "learning_rate": 6.0671330538094385e-06, + "loss": 0.2894, + "step": 21504, + "teacher_loss": 0.2657957077026367 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.8262927532196045, + "learning_rate": 6.065308576864859e-06, + "loss": 0.3471, + "step": 21505, + "teacher_loss": 0.29384446144104004 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.46700114011764526, + "learning_rate": 6.063484304765956e-06, + "loss": 0.2606, + "step": 21506, + "teacher_loss": 0.23763345181941986 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 1.2278504371643066, + "learning_rate": 6.061660237554576e-06, + "loss": 0.309, + "step": 21507, + "teacher_loss": 0.206892192363739 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.4533717632293701, + "learning_rate": 6.059836375272526e-06, + "loss": 0.1904, + "step": 21508, + "teacher_loss": 0.16122302412986755 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.3411828875541687, + "learning_rate": 6.058012717961621e-06, + "loss": 0.2128, + "step": 21509, + "teacher_loss": 0.1985655128955841 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.574245810508728, + "learning_rate": 6.056189265663674e-06, + "loss": 0.2399, + "step": 21510, + "teacher_loss": 0.20279093086719513 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.19872868061065674, + "learning_rate": 6.054366018420499e-06, + "loss": 0.2366, + "step": 21511, + "teacher_loss": 0.24076414108276367 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.46273842453956604, + "learning_rate": 6.052542976273882e-06, + "loss": 0.2612, + "step": 21512, + "teacher_loss": 0.23883458971977234 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.4668371081352234, + "learning_rate": 6.050720139265631e-06, + "loss": 0.2164, + "step": 21513, + "teacher_loss": 0.18861359357833862 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.690606951713562, + "learning_rate": 6.048897507437543e-06, + "loss": 0.3609, + "step": 21514, + "teacher_loss": 0.3242151141166687 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.6028565168380737, + "learning_rate": 6.047075080831395e-06, + "loss": 0.2307, + "step": 21515, + "teacher_loss": 0.18936854600906372 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.19261498749256134, + "learning_rate": 6.045252859488965e-06, + "loss": 0.1777, + "step": 21516, + "teacher_loss": 0.17602989077568054 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.43819937109947205, + "learning_rate": 6.043430843452049e-06, + "loss": 0.1818, + "step": 21517, + "teacher_loss": 0.1532726287841797 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.3872084617614746, + "learning_rate": 6.04160903276241e-06, + "loss": 0.2306, + "step": 21518, + "teacher_loss": 0.21323958039283752 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.4040103554725647, + "learning_rate": 6.039787427461806e-06, + "loss": 0.203, + "step": 21519, + "teacher_loss": 0.180661141872406 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.5683060884475708, + "learning_rate": 6.0379660275920245e-06, + "loss": 0.6271, + "step": 21520, + "teacher_loss": 0.6335958242416382 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.3333659768104553, + "learning_rate": 6.0361448331948125e-06, + "loss": 0.1466, + "step": 21521, + "teacher_loss": 0.1258769929409027 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.19274571537971497, + "learning_rate": 6.034323844311917e-06, + "loss": 0.1551, + "step": 21522, + "teacher_loss": 0.15088623762130737 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.5214722752571106, + "learning_rate": 6.0325030609850976e-06, + "loss": 0.2909, + "step": 21523, + "teacher_loss": 0.2653363347053528 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.40102940797805786, + "learning_rate": 6.030682483256101e-06, + "loss": 0.3611, + "step": 21524, + "teacher_loss": 0.35662397742271423 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.38145458698272705, + "learning_rate": 6.028862111166657e-06, + "loss": 0.213, + "step": 21525, + "teacher_loss": 0.1943264603614807 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.6443749666213989, + "learning_rate": 6.027041944758511e-06, + "loss": 0.2827, + "step": 21526, + "teacher_loss": 0.2424602508544922 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.6635802984237671, + "learning_rate": 6.025221984073394e-06, + "loss": 0.3144, + "step": 21527, + "teacher_loss": 0.27563968300819397 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.4399562478065491, + "learning_rate": 6.023402229153024e-06, + "loss": 0.2411, + "step": 21528, + "teacher_loss": 0.2189868688583374 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.4511321187019348, + "learning_rate": 6.02158268003913e-06, + "loss": 0.2645, + "step": 21529, + "teacher_loss": 0.24379494786262512 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.38995757699012756, + "learning_rate": 6.019763336773429e-06, + "loss": 0.225, + "step": 21530, + "teacher_loss": 0.20668691396713257 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.21348296105861664, + "learning_rate": 6.0179441993976315e-06, + "loss": 0.1601, + "step": 21531, + "teacher_loss": 0.15411344170570374 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.6203569173812866, + "learning_rate": 6.016125267953439e-06, + "loss": 0.2786, + "step": 21532, + "teacher_loss": 0.24059879779815674 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.28592991828918457, + "learning_rate": 6.0143065424825585e-06, + "loss": 0.2428, + "step": 21533, + "teacher_loss": 0.2380424439907074 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.31097495555877686, + "learning_rate": 6.012488023026694e-06, + "loss": 0.1564, + "step": 21534, + "teacher_loss": 0.1391746997833252 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.79007887840271, + "learning_rate": 6.010669709627529e-06, + "loss": 0.5178, + "step": 21535, + "teacher_loss": 0.4875562787055969 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.3001028299331665, + "learning_rate": 6.0088516023267546e-06, + "loss": 0.2128, + "step": 21536, + "teacher_loss": 0.20311081409454346 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.2174590826034546, + "learning_rate": 6.007033701166061e-06, + "loss": 0.2191, + "step": 21537, + "teacher_loss": 0.21932193636894226 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.36982911825180054, + "learning_rate": 6.005216006187118e-06, + "loss": 0.2851, + "step": 21538, + "teacher_loss": 0.27572688460350037 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.594820499420166, + "learning_rate": 6.003398517431604e-06, + "loss": 0.2289, + "step": 21539, + "teacher_loss": 0.18825441598892212 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.5105787515640259, + "learning_rate": 6.001581234941194e-06, + "loss": 0.2157, + "step": 21540, + "teacher_loss": 0.18293002247810364 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.23383063077926636, + "learning_rate": 5.999764158757543e-06, + "loss": 0.1922, + "step": 21541, + "teacher_loss": 0.18755602836608887 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.291191041469574, + "learning_rate": 5.99794728892232e-06, + "loss": 0.2323, + "step": 21542, + "teacher_loss": 0.2257024198770523 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.7122907638549805, + "learning_rate": 5.996130625477171e-06, + "loss": 0.5777, + "step": 21543, + "teacher_loss": 0.5627931356430054 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.31656503677368164, + "learning_rate": 5.994314168463752e-06, + "loss": 0.1761, + "step": 21544, + "teacher_loss": 0.16045910120010376 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.3985392451286316, + "learning_rate": 5.992497917923715e-06, + "loss": 0.3391, + "step": 21545, + "teacher_loss": 0.33246538043022156 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.576453685760498, + "learning_rate": 5.990681873898688e-06, + "loss": 0.3614, + "step": 21546, + "teacher_loss": 0.3374585509300232 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.14430472254753113, + "learning_rate": 5.988866036430314e-06, + "loss": 0.1347, + "step": 21547, + "teacher_loss": 0.133660227060318 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.3273911476135254, + "learning_rate": 5.987050405560233e-06, + "loss": 0.1659, + "step": 21548, + "teacher_loss": 0.14794358611106873 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.22120609879493713, + "learning_rate": 5.985234981330056e-06, + "loss": 0.1605, + "step": 21549, + "teacher_loss": 0.1537349820137024 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.26090455055236816, + "learning_rate": 5.983419763781415e-06, + "loss": 0.2047, + "step": 21550, + "teacher_loss": 0.19847427308559418 + }, + { + "compression_loss": 0.0, + "epoch": 3.89, + "label_loss": 0.34017497301101685, + "learning_rate": 5.981604752955928e-06, + "loss": 0.2317, + "step": 21551, + "teacher_loss": 0.21969184279441833 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.4736497402191162, + "learning_rate": 5.979789948895203e-06, + "loss": 0.2251, + "step": 21552, + "teacher_loss": 0.19744724035263062 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.6700502634048462, + "learning_rate": 5.977975351640856e-06, + "loss": 0.2863, + "step": 21553, + "teacher_loss": 0.2436957210302353 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.4106016755104065, + "learning_rate": 5.976160961234477e-06, + "loss": 0.2949, + "step": 21554, + "teacher_loss": 0.28209900856018066 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2841656804084778, + "learning_rate": 5.9743467777176795e-06, + "loss": 0.429, + "step": 21555, + "teacher_loss": 0.44505560398101807 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.8621479868888855, + "learning_rate": 5.972532801132044e-06, + "loss": 0.2728, + "step": 21556, + "teacher_loss": 0.2072819173336029 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.36448854207992554, + "learning_rate": 5.970719031519167e-06, + "loss": 0.2757, + "step": 21557, + "teacher_loss": 0.2658294141292572 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2556465268135071, + "learning_rate": 5.968905468920635e-06, + "loss": 0.2029, + "step": 21558, + "teacher_loss": 0.19705471396446228 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.5616415739059448, + "learning_rate": 5.9670921133780195e-06, + "loss": 0.1884, + "step": 21559, + "teacher_loss": 0.14696058630943298 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.30442333221435547, + "learning_rate": 5.9652789649329e-06, + "loss": 0.2593, + "step": 21560, + "teacher_loss": 0.2543134093284607 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3682350516319275, + "learning_rate": 5.963466023626851e-06, + "loss": 0.1647, + "step": 21561, + "teacher_loss": 0.142124742269516 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2007249891757965, + "learning_rate": 5.9616532895014286e-06, + "loss": 0.2226, + "step": 21562, + "teacher_loss": 0.2250109314918518 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.21461153030395508, + "learning_rate": 5.959840762598197e-06, + "loss": 0.2866, + "step": 21563, + "teacher_loss": 0.2945913076400757 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.6896828413009644, + "learning_rate": 5.9580284429587185e-06, + "loss": 0.2448, + "step": 21564, + "teacher_loss": 0.1953839361667633 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.39658069610595703, + "learning_rate": 5.956216330624539e-06, + "loss": 0.2461, + "step": 21565, + "teacher_loss": 0.22942045331001282 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.6749616265296936, + "learning_rate": 5.954404425637194e-06, + "loss": 0.23, + "step": 21566, + "teacher_loss": 0.1805938184261322 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.1674351692199707, + "learning_rate": 5.9525927280382455e-06, + "loss": 0.1839, + "step": 21567, + "teacher_loss": 0.18576158583164215 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.9018399715423584, + "learning_rate": 5.950781237869219e-06, + "loss": 0.5499, + "step": 21568, + "teacher_loss": 0.5108265280723572 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.30842745304107666, + "learning_rate": 5.948969955171639e-06, + "loss": 0.1817, + "step": 21569, + "teacher_loss": 0.16763754189014435 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 1.2110718488693237, + "learning_rate": 5.947158879987052e-06, + "loss": 0.3147, + "step": 21570, + "teacher_loss": 0.21513602137565613 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.24220135807991028, + "learning_rate": 5.945348012356969e-06, + "loss": 0.1807, + "step": 21571, + "teacher_loss": 0.1738283485174179 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3856716752052307, + "learning_rate": 5.943537352322903e-06, + "loss": 0.1967, + "step": 21572, + "teacher_loss": 0.1757173240184784 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.7092587947845459, + "learning_rate": 5.941726899926375e-06, + "loss": 0.2608, + "step": 21573, + "teacher_loss": 0.2109902799129486 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.4329985976219177, + "learning_rate": 5.939916655208895e-06, + "loss": 0.1621, + "step": 21574, + "teacher_loss": 0.1320222169160843 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.49796581268310547, + "learning_rate": 5.9381066182119565e-06, + "loss": 0.2142, + "step": 21575, + "teacher_loss": 0.1826179027557373 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.25970178842544556, + "learning_rate": 5.936296788977065e-06, + "loss": 0.2806, + "step": 21576, + "teacher_loss": 0.2828960418701172 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.47879883646965027, + "learning_rate": 5.93448716754572e-06, + "loss": 0.1613, + "step": 21577, + "teacher_loss": 0.1260617971420288 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2591463327407837, + "learning_rate": 5.932677753959405e-06, + "loss": 0.2474, + "step": 21578, + "teacher_loss": 0.24609911441802979 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.17357288300991058, + "learning_rate": 5.93086854825959e-06, + "loss": 0.1894, + "step": 21579, + "teacher_loss": 0.19119200110435486 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3128025531768799, + "learning_rate": 5.929059550487782e-06, + "loss": 0.1902, + "step": 21580, + "teacher_loss": 0.1765539050102234 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3320736289024353, + "learning_rate": 5.927250760685441e-06, + "loss": 0.2052, + "step": 21581, + "teacher_loss": 0.19105927646160126 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.5902751088142395, + "learning_rate": 5.9254421788940335e-06, + "loss": 0.2577, + "step": 21582, + "teacher_loss": 0.22069881856441498 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3179823160171509, + "learning_rate": 5.923633805155032e-06, + "loss": 0.1593, + "step": 21583, + "teacher_loss": 0.14171132445335388 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.471457302570343, + "learning_rate": 5.921825639509898e-06, + "loss": 0.2273, + "step": 21584, + "teacher_loss": 0.2001430094242096 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.09451904892921448, + "learning_rate": 5.920017682000078e-06, + "loss": 0.1473, + "step": 21585, + "teacher_loss": 0.15319198369979858 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2655085325241089, + "learning_rate": 5.918209932667031e-06, + "loss": 0.1933, + "step": 21586, + "teacher_loss": 0.18524982035160065 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.238729327917099, + "learning_rate": 5.916402391552207e-06, + "loss": 0.1531, + "step": 21587, + "teacher_loss": 0.14356687664985657 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2084427922964096, + "learning_rate": 5.914595058697037e-06, + "loss": 0.2039, + "step": 21588, + "teacher_loss": 0.20335537195205688 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.5805263519287109, + "learning_rate": 5.912787934142962e-06, + "loss": 0.237, + "step": 21589, + "teacher_loss": 0.1987752914428711 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.5270664095878601, + "learning_rate": 5.910981017931422e-06, + "loss": 0.2363, + "step": 21590, + "teacher_loss": 0.2040441632270813 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.10751629620790482, + "learning_rate": 5.90917431010383e-06, + "loss": 0.1752, + "step": 21591, + "teacher_loss": 0.18277347087860107 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.503272533416748, + "learning_rate": 5.907367810701615e-06, + "loss": 0.2367, + "step": 21592, + "teacher_loss": 0.20710471272468567 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3892131447792053, + "learning_rate": 5.9055615197662e-06, + "loss": 0.2686, + "step": 21593, + "teacher_loss": 0.25521424412727356 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.4386591613292694, + "learning_rate": 5.9037554373389954e-06, + "loss": 0.1855, + "step": 21594, + "teacher_loss": 0.15737372636795044 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.8190631866455078, + "learning_rate": 5.901949563461399e-06, + "loss": 0.263, + "step": 21595, + "teacher_loss": 0.2012414187192917 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.19345691800117493, + "learning_rate": 5.900143898174822e-06, + "loss": 0.1821, + "step": 21596, + "teacher_loss": 0.18083441257476807 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.8136383295059204, + "learning_rate": 5.8983384415206685e-06, + "loss": 0.3277, + "step": 21597, + "teacher_loss": 0.27368173003196716 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.2521097958087921, + "learning_rate": 5.89653319354032e-06, + "loss": 0.2449, + "step": 21598, + "teacher_loss": 0.24408583343029022 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.6602687239646912, + "learning_rate": 5.894728154275173e-06, + "loss": 0.3157, + "step": 21599, + "teacher_loss": 0.2774094343185425 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.46900177001953125, + "learning_rate": 5.892923323766615e-06, + "loss": 0.263, + "step": 21600, + "teacher_loss": 0.2401224821805954 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.479117751121521, + "learning_rate": 5.891118702056017e-06, + "loss": 0.2792, + "step": 21601, + "teacher_loss": 0.25693657994270325 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.25928887724876404, + "learning_rate": 5.889314289184755e-06, + "loss": 0.2592, + "step": 21602, + "teacher_loss": 0.2591739296913147 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.6671919822692871, + "learning_rate": 5.887510085194208e-06, + "loss": 0.2252, + "step": 21603, + "teacher_loss": 0.1760404258966446 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.3644455671310425, + "learning_rate": 5.885706090125728e-06, + "loss": 0.2489, + "step": 21604, + "teacher_loss": 0.23601654171943665 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.6150169968605042, + "learning_rate": 5.883902304020687e-06, + "loss": 0.1691, + "step": 21605, + "teacher_loss": 0.11956362426280975 + }, + { + "compression_loss": 0.0, + "epoch": 3.9, + "label_loss": 0.24721619486808777, + "learning_rate": 5.882098726920429e-06, + "loss": 0.2016, + "step": 21606, + "teacher_loss": 0.19655998051166534 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.4026600122451782, + "learning_rate": 5.88029535886631e-06, + "loss": 0.2141, + "step": 21607, + "teacher_loss": 0.1931876838207245 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5333244204521179, + "learning_rate": 5.8784921998996836e-06, + "loss": 0.3039, + "step": 21608, + "teacher_loss": 0.27839308977127075 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.46213236451148987, + "learning_rate": 5.8766892500618755e-06, + "loss": 0.2272, + "step": 21609, + "teacher_loss": 0.20106427371501923 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.6251480579376221, + "learning_rate": 5.8748865093942315e-06, + "loss": 0.2236, + "step": 21610, + "teacher_loss": 0.17902909219264984 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3579678535461426, + "learning_rate": 5.873083977938084e-06, + "loss": 0.2606, + "step": 21611, + "teacher_loss": 0.2498040497303009 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.711675763130188, + "learning_rate": 5.8712816557347546e-06, + "loss": 0.2288, + "step": 21612, + "teacher_loss": 0.1750963032245636 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3176117241382599, + "learning_rate": 5.869479542825566e-06, + "loss": 0.2373, + "step": 21613, + "teacher_loss": 0.22834038734436035 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.35230937600135803, + "learning_rate": 5.867677639251844e-06, + "loss": 0.2067, + "step": 21614, + "teacher_loss": 0.19054833054542542 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.43324583768844604, + "learning_rate": 5.865875945054891e-06, + "loss": 0.1973, + "step": 21615, + "teacher_loss": 0.1710677295923233 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.345924437046051, + "learning_rate": 5.864074460276012e-06, + "loss": 0.2297, + "step": 21616, + "teacher_loss": 0.21675482392311096 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.2902829349040985, + "learning_rate": 5.862273184956515e-06, + "loss": 0.158, + "step": 21617, + "teacher_loss": 0.1433359682559967 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3983793258666992, + "learning_rate": 5.860472119137703e-06, + "loss": 0.2064, + "step": 21618, + "teacher_loss": 0.18507874011993408 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5508131384849548, + "learning_rate": 5.85867126286086e-06, + "loss": 0.2973, + "step": 21619, + "teacher_loss": 0.2691289782524109 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.4712649881839752, + "learning_rate": 5.856870616167277e-06, + "loss": 0.1931, + "step": 21620, + "teacher_loss": 0.16214478015899658 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.24591794610023499, + "learning_rate": 5.855070179098243e-06, + "loss": 0.2419, + "step": 21621, + "teacher_loss": 0.2414049208164215 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3579743206501007, + "learning_rate": 5.853269951695026e-06, + "loss": 0.2639, + "step": 21622, + "teacher_loss": 0.2534680664539337 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.18500937521457672, + "learning_rate": 5.851469933998907e-06, + "loss": 0.1945, + "step": 21623, + "teacher_loss": 0.19550201296806335 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.21116414666175842, + "learning_rate": 5.849670126051159e-06, + "loss": 0.2002, + "step": 21624, + "teacher_loss": 0.19899138808250427 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.26453685760498047, + "learning_rate": 5.8478705278930335e-06, + "loss": 0.1918, + "step": 21625, + "teacher_loss": 0.18369004130363464 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5787939429283142, + "learning_rate": 5.846071139565799e-06, + "loss": 0.2792, + "step": 21626, + "teacher_loss": 0.24590596556663513 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.19722458720207214, + "learning_rate": 5.844271961110713e-06, + "loss": 0.2417, + "step": 21627, + "teacher_loss": 0.2466726005077362 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.1357852816581726, + "learning_rate": 5.842472992569021e-06, + "loss": 0.1818, + "step": 21628, + "teacher_loss": 0.18696370720863342 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5399938225746155, + "learning_rate": 5.840674233981957e-06, + "loss": 0.2214, + "step": 21629, + "teacher_loss": 0.1860440969467163 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3106970191001892, + "learning_rate": 5.838875685390783e-06, + "loss": 0.1739, + "step": 21630, + "teacher_loss": 0.1586693525314331 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.4684392809867859, + "learning_rate": 5.837077346836722e-06, + "loss": 0.2417, + "step": 21631, + "teacher_loss": 0.21649707853794098 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3990427851676941, + "learning_rate": 5.835279218361001e-06, + "loss": 0.1749, + "step": 21632, + "teacher_loss": 0.15000857412815094 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.2696496844291687, + "learning_rate": 5.83348130000485e-06, + "loss": 0.17, + "step": 21633, + "teacher_loss": 0.15894511342048645 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.7739237546920776, + "learning_rate": 5.831683591809494e-06, + "loss": 0.3032, + "step": 21634, + "teacher_loss": 0.25093701481819153 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.47465670108795166, + "learning_rate": 5.829886093816143e-06, + "loss": 0.2106, + "step": 21635, + "teacher_loss": 0.18124036490917206 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.11065588146448135, + "learning_rate": 5.828088806066007e-06, + "loss": 0.2025, + "step": 21636, + "teacher_loss": 0.21266357600688934 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 1.2231824398040771, + "learning_rate": 5.826291728600303e-06, + "loss": 0.2418, + "step": 21637, + "teacher_loss": 0.13277727365493774 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.912986159324646, + "learning_rate": 5.824494861460226e-06, + "loss": 0.3002, + "step": 21638, + "teacher_loss": 0.2321140319108963 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5347219705581665, + "learning_rate": 5.8226982046869615e-06, + "loss": 0.1926, + "step": 21639, + "teacher_loss": 0.15454307198524475 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.2978341579437256, + "learning_rate": 5.820901758321721e-06, + "loss": 0.2099, + "step": 21640, + "teacher_loss": 0.20011213421821594 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.23521390557289124, + "learning_rate": 5.819105522405684e-06, + "loss": 0.2276, + "step": 21641, + "teacher_loss": 0.22670245170593262 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.3059546947479248, + "learning_rate": 5.817309496980021e-06, + "loss": 0.2283, + "step": 21642, + "teacher_loss": 0.219665989279747 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.27937451004981995, + "learning_rate": 5.815513682085931e-06, + "loss": 0.3245, + "step": 21643, + "teacher_loss": 0.32946592569351196 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.44344207644462585, + "learning_rate": 5.813718077764576e-06, + "loss": 0.2901, + "step": 21644, + "teacher_loss": 0.27307188510894775 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.9404531121253967, + "learning_rate": 5.811922684057118e-06, + "loss": 0.3215, + "step": 21645, + "teacher_loss": 0.252686083316803 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.36817511916160583, + "learning_rate": 5.810127501004726e-06, + "loss": 0.1936, + "step": 21646, + "teacher_loss": 0.17422160506248474 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5702496767044067, + "learning_rate": 5.8083325286485615e-06, + "loss": 0.2656, + "step": 21647, + "teacher_loss": 0.2317364513874054 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.5612221956253052, + "learning_rate": 5.80653776702977e-06, + "loss": 0.2477, + "step": 21648, + "teacher_loss": 0.21284985542297363 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.327936589717865, + "learning_rate": 5.804743216189504e-06, + "loss": 0.1785, + "step": 21649, + "teacher_loss": 0.16184954345226288 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.572303295135498, + "learning_rate": 5.802948876168912e-06, + "loss": 0.2176, + "step": 21650, + "teacher_loss": 0.17815014719963074 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.2846210300922394, + "learning_rate": 5.8011547470091224e-06, + "loss": 0.3121, + "step": 21651, + "teacher_loss": 0.31512880325317383 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.20536591112613678, + "learning_rate": 5.7993608287512756e-06, + "loss": 0.22, + "step": 21652, + "teacher_loss": 0.2216547727584839 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.8416795134544373, + "learning_rate": 5.797567121436505e-06, + "loss": 0.365, + "step": 21653, + "teacher_loss": 0.31208163499832153 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.34806978702545166, + "learning_rate": 5.7957736251059265e-06, + "loss": 0.3385, + "step": 21654, + "teacher_loss": 0.3374047875404358 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.31881096959114075, + "learning_rate": 5.793980339800666e-06, + "loss": 0.2143, + "step": 21655, + "teacher_loss": 0.20268628001213074 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.16841164231300354, + "learning_rate": 5.792187265561831e-06, + "loss": 0.1811, + "step": 21656, + "teacher_loss": 0.18250912427902222 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.17222747206687927, + "learning_rate": 5.79039440243054e-06, + "loss": 0.1196, + "step": 21657, + "teacher_loss": 0.11379311978816986 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.29466214776039124, + "learning_rate": 5.7886017504478896e-06, + "loss": 0.2291, + "step": 21658, + "teacher_loss": 0.2217944860458374 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.4614105224609375, + "learning_rate": 5.786809309654983e-06, + "loss": 0.247, + "step": 21659, + "teacher_loss": 0.22321642935276031 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.4673064053058624, + "learning_rate": 5.785017080092921e-06, + "loss": 0.2022, + "step": 21660, + "teacher_loss": 0.17278021574020386 + }, + { + "compression_loss": 0.0, + "epoch": 3.91, + "label_loss": 0.6787277460098267, + "learning_rate": 5.783225061802786e-06, + "loss": 0.2876, + "step": 21661, + "teacher_loss": 0.24414291977882385 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.44859573245048523, + "learning_rate": 5.781433254825666e-06, + "loss": 0.2384, + "step": 21662, + "teacher_loss": 0.2150963395833969 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3178305923938751, + "learning_rate": 5.779641659202648e-06, + "loss": 0.1844, + "step": 21663, + "teacher_loss": 0.1695285439491272 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.48061874508857727, + "learning_rate": 5.777850274974797e-06, + "loss": 0.2806, + "step": 21664, + "teacher_loss": 0.25834590196609497 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.4110620617866516, + "learning_rate": 5.776059102183194e-06, + "loss": 0.2221, + "step": 21665, + "teacher_loss": 0.20107778906822205 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.18785390257835388, + "learning_rate": 5.774268140868897e-06, + "loss": 0.2042, + "step": 21666, + "teacher_loss": 0.20604942739009857 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.5896732807159424, + "learning_rate": 5.772477391072972e-06, + "loss": 0.2453, + "step": 21667, + "teacher_loss": 0.20706912875175476 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.35474371910095215, + "learning_rate": 5.77068685283648e-06, + "loss": 0.2741, + "step": 21668, + "teacher_loss": 0.26514932513237 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.8012501001358032, + "learning_rate": 5.7688965262004625e-06, + "loss": 0.3274, + "step": 21669, + "teacher_loss": 0.2747178077697754 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.45436084270477295, + "learning_rate": 5.7671064112059695e-06, + "loss": 0.2584, + "step": 21670, + "teacher_loss": 0.23663340508937836 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.49641603231430054, + "learning_rate": 5.765316507894052e-06, + "loss": 0.2174, + "step": 21671, + "teacher_loss": 0.18639829754829407 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.6210223436355591, + "learning_rate": 5.763526816305734e-06, + "loss": 0.2618, + "step": 21672, + "teacher_loss": 0.2218715101480484 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.44714102149009705, + "learning_rate": 5.761737336482054e-06, + "loss": 0.3006, + "step": 21673, + "teacher_loss": 0.28429192304611206 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.2280219942331314, + "learning_rate": 5.759948068464044e-06, + "loss": 0.1613, + "step": 21674, + "teacher_loss": 0.15390491485595703 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.23498138785362244, + "learning_rate": 5.758159012292717e-06, + "loss": 0.1762, + "step": 21675, + "teacher_loss": 0.16965194046497345 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.33354857563972473, + "learning_rate": 5.7563701680090995e-06, + "loss": 0.1639, + "step": 21676, + "teacher_loss": 0.14507059752941132 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3858204185962677, + "learning_rate": 5.754581535654196e-06, + "loss": 0.1567, + "step": 21677, + "teacher_loss": 0.1312216967344284 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.7607284188270569, + "learning_rate": 5.752793115269023e-06, + "loss": 0.2977, + "step": 21678, + "teacher_loss": 0.2463020235300064 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.4339180588722229, + "learning_rate": 5.7510049068945755e-06, + "loss": 0.2399, + "step": 21679, + "teacher_loss": 0.21835613250732422 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.24386480450630188, + "learning_rate": 5.749216910571854e-06, + "loss": 0.2023, + "step": 21680, + "teacher_loss": 0.1976274847984314 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.5869287252426147, + "learning_rate": 5.747429126341859e-06, + "loss": 0.293, + "step": 21681, + "teacher_loss": 0.26029354333877563 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.388175904750824, + "learning_rate": 5.745641554245569e-06, + "loss": 0.2242, + "step": 21682, + "teacher_loss": 0.20600861310958862 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.27570071816444397, + "learning_rate": 5.743854194323972e-06, + "loss": 0.2267, + "step": 21683, + "teacher_loss": 0.2212313711643219 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.09506860375404358, + "learning_rate": 5.742067046618052e-06, + "loss": 0.1422, + "step": 21684, + "teacher_loss": 0.14746659994125366 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.8266760110855103, + "learning_rate": 5.7402801111687725e-06, + "loss": 0.2364, + "step": 21685, + "teacher_loss": 0.17083081603050232 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.17504079639911652, + "learning_rate": 5.738493388017108e-06, + "loss": 0.169, + "step": 21686, + "teacher_loss": 0.16837219893932343 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.19514837861061096, + "learning_rate": 5.736706877204029e-06, + "loss": 0.1574, + "step": 21687, + "teacher_loss": 0.15325510501861572 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.2866716682910919, + "learning_rate": 5.734920578770486e-06, + "loss": 0.2433, + "step": 21688, + "teacher_loss": 0.23853181302547455 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.7264032959938049, + "learning_rate": 5.7331344927574274e-06, + "loss": 0.3449, + "step": 21689, + "teacher_loss": 0.3025277256965637 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.6957714557647705, + "learning_rate": 5.731348619205822e-06, + "loss": 0.2894, + "step": 21690, + "teacher_loss": 0.2442837655544281 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3914807140827179, + "learning_rate": 5.7295629581566035e-06, + "loss": 0.2373, + "step": 21691, + "teacher_loss": 0.22014504671096802 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.38368386030197144, + "learning_rate": 5.727777509650701e-06, + "loss": 0.1622, + "step": 21692, + "teacher_loss": 0.1375441551208496 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3213942050933838, + "learning_rate": 5.725992273729073e-06, + "loss": 0.2255, + "step": 21693, + "teacher_loss": 0.21482135355472565 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3944419026374817, + "learning_rate": 5.724207250432635e-06, + "loss": 0.2244, + "step": 21694, + "teacher_loss": 0.20546674728393555 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.4395279288291931, + "learning_rate": 5.72242243980231e-06, + "loss": 0.1794, + "step": 21695, + "teacher_loss": 0.15044797956943512 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.5526009798049927, + "learning_rate": 5.720637841879024e-06, + "loss": 0.2504, + "step": 21696, + "teacher_loss": 0.21684645116329193 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3428601324558258, + "learning_rate": 5.718853456703696e-06, + "loss": 0.2316, + "step": 21697, + "teacher_loss": 0.21926641464233398 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.45548972487449646, + "learning_rate": 5.717069284317225e-06, + "loss": 0.1613, + "step": 21698, + "teacher_loss": 0.12860971689224243 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.8189011812210083, + "learning_rate": 5.715285324760525e-06, + "loss": 0.213, + "step": 21699, + "teacher_loss": 0.14563071727752686 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3457135558128357, + "learning_rate": 5.7135015780745e-06, + "loss": 0.2774, + "step": 21700, + "teacher_loss": 0.269858717918396 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.12041258811950684, + "learning_rate": 5.7117180443000425e-06, + "loss": 0.1715, + "step": 21701, + "teacher_loss": 0.17721140384674072 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.18133969604969025, + "learning_rate": 5.7099347234780315e-06, + "loss": 0.1473, + "step": 21702, + "teacher_loss": 0.1435309648513794 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3937842845916748, + "learning_rate": 5.708151615649375e-06, + "loss": 0.1895, + "step": 21703, + "teacher_loss": 0.16677403450012207 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.4104272723197937, + "learning_rate": 5.706368720854943e-06, + "loss": 0.2521, + "step": 21704, + "teacher_loss": 0.23454910516738892 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.46406644582748413, + "learning_rate": 5.704586039135608e-06, + "loss": 0.216, + "step": 21705, + "teacher_loss": 0.18847134709358215 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.4580375552177429, + "learning_rate": 5.702803570532245e-06, + "loss": 0.208, + "step": 21706, + "teacher_loss": 0.1801944077014923 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.2542845606803894, + "learning_rate": 5.701021315085728e-06, + "loss": 0.1832, + "step": 21707, + "teacher_loss": 0.17528820037841797 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.3979668617248535, + "learning_rate": 5.699239272836907e-06, + "loss": 0.2292, + "step": 21708, + "teacher_loss": 0.21046772599220276 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.30313894152641296, + "learning_rate": 5.697457443826644e-06, + "loss": 0.1717, + "step": 21709, + "teacher_loss": 0.15709473192691803 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.38411879539489746, + "learning_rate": 5.695675828095797e-06, + "loss": 0.2681, + "step": 21710, + "teacher_loss": 0.25517305731773376 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.24414892494678497, + "learning_rate": 5.6938944256852025e-06, + "loss": 0.1488, + "step": 21711, + "teacher_loss": 0.1382075846195221 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.6691270470619202, + "learning_rate": 5.692113236635706e-06, + "loss": 0.2393, + "step": 21712, + "teacher_loss": 0.19156885147094727 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.42486846446990967, + "learning_rate": 5.690332260988152e-06, + "loss": 0.17, + "step": 21713, + "teacher_loss": 0.1416792869567871 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.21350818872451782, + "learning_rate": 5.688551498783364e-06, + "loss": 0.2314, + "step": 21714, + "teacher_loss": 0.23338176310062408 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.6247075200080872, + "learning_rate": 5.686770950062177e-06, + "loss": 0.2873, + "step": 21715, + "teacher_loss": 0.2498628944158554 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.22747118771076202, + "learning_rate": 5.684990614865406e-06, + "loss": 0.1412, + "step": 21716, + "teacher_loss": 0.13165727257728577 + }, + { + "compression_loss": 0.0, + "epoch": 3.92, + "label_loss": 0.6841297149658203, + "learning_rate": 5.68321049323387e-06, + "loss": 0.2239, + "step": 21717, + "teacher_loss": 0.17275935411453247 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.22954882681369781, + "learning_rate": 5.681430585208391e-06, + "loss": 0.1863, + "step": 21718, + "teacher_loss": 0.18147417902946472 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3274756371974945, + "learning_rate": 5.679650890829763e-06, + "loss": 0.2014, + "step": 21719, + "teacher_loss": 0.18739989399909973 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.33712196350097656, + "learning_rate": 5.677871410138804e-06, + "loss": 0.1734, + "step": 21720, + "teacher_loss": 0.1551661193370819 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6797663569450378, + "learning_rate": 5.676092143176297e-06, + "loss": 0.219, + "step": 21721, + "teacher_loss": 0.16781717538833618 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.34490782022476196, + "learning_rate": 5.674313089983044e-06, + "loss": 0.1934, + "step": 21722, + "teacher_loss": 0.17651715874671936 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6178730130195618, + "learning_rate": 5.672534250599836e-06, + "loss": 0.2021, + "step": 21723, + "teacher_loss": 0.15591385960578918 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6356312036514282, + "learning_rate": 5.670755625067449e-06, + "loss": 0.2063, + "step": 21724, + "teacher_loss": 0.15861597657203674 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.2869800925254822, + "learning_rate": 5.668977213426664e-06, + "loss": 0.2186, + "step": 21725, + "teacher_loss": 0.21102482080459595 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 1.0469518899917603, + "learning_rate": 5.667199015718261e-06, + "loss": 0.3694, + "step": 21726, + "teacher_loss": 0.29410645365715027 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.8406246900558472, + "learning_rate": 5.665421031982997e-06, + "loss": 0.2424, + "step": 21727, + "teacher_loss": 0.1759752780199051 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.4865397810935974, + "learning_rate": 5.663643262261648e-06, + "loss": 0.1862, + "step": 21728, + "teacher_loss": 0.1527741253376007 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.46112725138664246, + "learning_rate": 5.661865706594963e-06, + "loss": 0.2133, + "step": 21729, + "teacher_loss": 0.185746431350708 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3609007000923157, + "learning_rate": 5.660088365023699e-06, + "loss": 0.2796, + "step": 21730, + "teacher_loss": 0.2705879211425781 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.5885866284370422, + "learning_rate": 5.658311237588613e-06, + "loss": 0.3239, + "step": 21731, + "teacher_loss": 0.29449528455734253 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.17170582711696625, + "learning_rate": 5.656534324330436e-06, + "loss": 0.1719, + "step": 21732, + "teacher_loss": 0.1719554364681244 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.643259584903717, + "learning_rate": 5.654757625289913e-06, + "loss": 0.324, + "step": 21733, + "teacher_loss": 0.28848567605018616 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.7844679355621338, + "learning_rate": 5.652981140507786e-06, + "loss": 0.2941, + "step": 21734, + "teacher_loss": 0.23958855867385864 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.24841442704200745, + "learning_rate": 5.651204870024772e-06, + "loss": 0.1883, + "step": 21735, + "teacher_loss": 0.18161404132843018 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3275895118713379, + "learning_rate": 5.649428813881601e-06, + "loss": 0.213, + "step": 21736, + "teacher_loss": 0.20023277401924133 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.26092907786369324, + "learning_rate": 5.647652972118998e-06, + "loss": 0.1217, + "step": 21737, + "teacher_loss": 0.10623326152563095 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.2945171296596527, + "learning_rate": 5.645877344777672e-06, + "loss": 0.1851, + "step": 21738, + "teacher_loss": 0.1729859709739685 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.21960745751857758, + "learning_rate": 5.64410193189833e-06, + "loss": 0.2344, + "step": 21739, + "teacher_loss": 0.2360624074935913 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.5657855272293091, + "learning_rate": 5.642326733521678e-06, + "loss": 0.2475, + "step": 21740, + "teacher_loss": 0.21210221946239471 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.7064233422279358, + "learning_rate": 5.640551749688424e-06, + "loss": 0.3263, + "step": 21741, + "teacher_loss": 0.2841082811355591 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.31813669204711914, + "learning_rate": 5.638776980439253e-06, + "loss": 0.1862, + "step": 21742, + "teacher_loss": 0.17152544856071472 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.24169036746025085, + "learning_rate": 5.6370024258148595e-06, + "loss": 0.1888, + "step": 21743, + "teacher_loss": 0.18289324641227722 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.25272631645202637, + "learning_rate": 5.635228085855934e-06, + "loss": 0.1771, + "step": 21744, + "teacher_loss": 0.1687333583831787 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.2720526456832886, + "learning_rate": 5.633453960603146e-06, + "loss": 0.1485, + "step": 21745, + "teacher_loss": 0.13482016324996948 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.4527941942214966, + "learning_rate": 5.631680050097176e-06, + "loss": 0.2561, + "step": 21746, + "teacher_loss": 0.23424610495567322 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.11322474479675293, + "learning_rate": 5.629906354378699e-06, + "loss": 0.2063, + "step": 21747, + "teacher_loss": 0.2166532576084137 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.2210656702518463, + "learning_rate": 5.628132873488372e-06, + "loss": 0.2498, + "step": 21748, + "teacher_loss": 0.25294333696365356 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6846473813056946, + "learning_rate": 5.626359607466861e-06, + "loss": 0.2688, + "step": 21749, + "teacher_loss": 0.22262606024742126 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3647555410861969, + "learning_rate": 5.624586556354824e-06, + "loss": 0.2215, + "step": 21750, + "teacher_loss": 0.20553144812583923 + }, + { + "epoch": 3.93, + "eval_exact_match": 80.50141911069063, + "eval_f1": 87.82737745441388, + "step": 21750 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6421164274215698, + "learning_rate": 5.622813720192909e-06, + "loss": 0.2168, + "step": 21751, + "teacher_loss": 0.16956683993339539 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3941727578639984, + "learning_rate": 5.621041099021751e-06, + "loss": 0.2662, + "step": 21752, + "teacher_loss": 0.2520271837711334 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.406181275844574, + "learning_rate": 5.619268692882011e-06, + "loss": 0.1993, + "step": 21753, + "teacher_loss": 0.17625921964645386 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.9459228515625, + "learning_rate": 5.6174965018143145e-06, + "loss": 0.3764, + "step": 21754, + "teacher_loss": 0.3130866289138794 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.43356359004974365, + "learning_rate": 5.61572452585929e-06, + "loss": 0.3113, + "step": 21755, + "teacher_loss": 0.29775160551071167 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6293652057647705, + "learning_rate": 5.613952765057564e-06, + "loss": 0.3279, + "step": 21756, + "teacher_loss": 0.29445815086364746 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.42724162340164185, + "learning_rate": 5.612181219449766e-06, + "loss": 0.2644, + "step": 21757, + "teacher_loss": 0.2462974339723587 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.23358628153800964, + "learning_rate": 5.610409889076502e-06, + "loss": 0.1699, + "step": 21758, + "teacher_loss": 0.16280022263526917 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.26864466071128845, + "learning_rate": 5.608638773978389e-06, + "loss": 0.1288, + "step": 21759, + "teacher_loss": 0.11324891448020935 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.5911637544631958, + "learning_rate": 5.6068678741960345e-06, + "loss": 0.2722, + "step": 21760, + "teacher_loss": 0.23670923709869385 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.30494260787963867, + "learning_rate": 5.605097189770039e-06, + "loss": 0.1862, + "step": 21761, + "teacher_loss": 0.17301592230796814 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.4083373248577118, + "learning_rate": 5.6033267207409875e-06, + "loss": 0.2313, + "step": 21762, + "teacher_loss": 0.2116081714630127 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3688318729400635, + "learning_rate": 5.6015564671494924e-06, + "loss": 0.2139, + "step": 21763, + "teacher_loss": 0.19663822650909424 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.5753298997879028, + "learning_rate": 5.599786429036129e-06, + "loss": 0.3095, + "step": 21764, + "teacher_loss": 0.27996665239334106 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.2599407434463501, + "learning_rate": 5.598016606441468e-06, + "loss": 0.2092, + "step": 21765, + "teacher_loss": 0.20357708632946014 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.5110395550727844, + "learning_rate": 5.59624699940611e-06, + "loss": 0.2345, + "step": 21766, + "teacher_loss": 0.20372425019741058 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.7064459919929504, + "learning_rate": 5.594477607970613e-06, + "loss": 0.3742, + "step": 21767, + "teacher_loss": 0.3373359739780426 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.3331388831138611, + "learning_rate": 5.59270843217554e-06, + "loss": 0.1494, + "step": 21768, + "teacher_loss": 0.1290060132741928 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.1701088696718216, + "learning_rate": 5.590939472061459e-06, + "loss": 0.1274, + "step": 21769, + "teacher_loss": 0.12268504500389099 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.4092593193054199, + "learning_rate": 5.58917072766893e-06, + "loss": 0.1735, + "step": 21770, + "teacher_loss": 0.1472693681716919 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.34680867195129395, + "learning_rate": 5.587402199038497e-06, + "loss": 0.2562, + "step": 21771, + "teacher_loss": 0.24616605043411255 + }, + { + "compression_loss": 0.0, + "epoch": 3.93, + "label_loss": 0.6532238721847534, + "learning_rate": 5.585633886210711e-06, + "loss": 0.2809, + "step": 21772, + "teacher_loss": 0.23955005407333374 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3128882646560669, + "learning_rate": 5.583865789226118e-06, + "loss": 0.2019, + "step": 21773, + "teacher_loss": 0.18954743444919586 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.41160523891448975, + "learning_rate": 5.582097908125249e-06, + "loss": 0.2393, + "step": 21774, + "teacher_loss": 0.22016194462776184 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.687391996383667, + "learning_rate": 5.580330242948636e-06, + "loss": 0.2868, + "step": 21775, + "teacher_loss": 0.24227085709571838 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.16445928812026978, + "learning_rate": 5.578562793736816e-06, + "loss": 0.116, + "step": 21776, + "teacher_loss": 0.1106310710310936 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.2942621409893036, + "learning_rate": 5.576795560530297e-06, + "loss": 0.2749, + "step": 21777, + "teacher_loss": 0.2727093994617462 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.29929181933403015, + "learning_rate": 5.57502854336961e-06, + "loss": 0.1435, + "step": 21778, + "teacher_loss": 0.1261957436800003 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.6811418533325195, + "learning_rate": 5.573261742295257e-06, + "loss": 0.2265, + "step": 21779, + "teacher_loss": 0.17594948410987854 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.29011276364326477, + "learning_rate": 5.571495157347749e-06, + "loss": 0.223, + "step": 21780, + "teacher_loss": 0.215493306517601 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3197956085205078, + "learning_rate": 5.569728788567593e-06, + "loss": 0.2945, + "step": 21781, + "teacher_loss": 0.2916387617588043 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.4756806194782257, + "learning_rate": 5.567962635995276e-06, + "loss": 0.2883, + "step": 21782, + "teacher_loss": 0.2674769461154938 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.39954444766044617, + "learning_rate": 5.566196699671304e-06, + "loss": 0.215, + "step": 21783, + "teacher_loss": 0.19446727633476257 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 1.0768918991088867, + "learning_rate": 5.5644309796361525e-06, + "loss": 0.2594, + "step": 21784, + "teacher_loss": 0.1685260683298111 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.38015520572662354, + "learning_rate": 5.5626654759303085e-06, + "loss": 0.3141, + "step": 21785, + "teacher_loss": 0.30670660734176636 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.23343563079833984, + "learning_rate": 5.560900188594257e-06, + "loss": 0.1825, + "step": 21786, + "teacher_loss": 0.1767873615026474 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.27691322565078735, + "learning_rate": 5.559135117668458e-06, + "loss": 0.1749, + "step": 21787, + "teacher_loss": 0.1635380983352661 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.36531662940979004, + "learning_rate": 5.557370263193392e-06, + "loss": 0.2043, + "step": 21788, + "teacher_loss": 0.18639256060123444 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.45378464460372925, + "learning_rate": 5.555605625209509e-06, + "loss": 0.2425, + "step": 21789, + "teacher_loss": 0.21904532611370087 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3090687394142151, + "learning_rate": 5.553841203757273e-06, + "loss": 0.1944, + "step": 21790, + "teacher_loss": 0.18169020116329193 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.2517775893211365, + "learning_rate": 5.5520769988771445e-06, + "loss": 0.1977, + "step": 21791, + "teacher_loss": 0.19167311489582062 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.283770889043808, + "learning_rate": 5.550313010609557e-06, + "loss": 0.2648, + "step": 21792, + "teacher_loss": 0.2626928389072418 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.30221399664878845, + "learning_rate": 5.54854923899496e-06, + "loss": 0.2385, + "step": 21793, + "teacher_loss": 0.23138776421546936 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.6881433725357056, + "learning_rate": 5.546785684073798e-06, + "loss": 0.2157, + "step": 21794, + "teacher_loss": 0.1632264256477356 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.2312692105770111, + "learning_rate": 5.545022345886493e-06, + "loss": 0.3013, + "step": 21795, + "teacher_loss": 0.30905210971832275 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3266114294528961, + "learning_rate": 5.543259224473477e-06, + "loss": 0.2714, + "step": 21796, + "teacher_loss": 0.26523900032043457 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.39791879057884216, + "learning_rate": 5.541496319875181e-06, + "loss": 0.2085, + "step": 21797, + "teacher_loss": 0.18747058510780334 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.33680111169815063, + "learning_rate": 5.539733632132012e-06, + "loss": 0.2262, + "step": 21798, + "teacher_loss": 0.21385599672794342 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.22774305939674377, + "learning_rate": 5.5379711612843854e-06, + "loss": 0.1904, + "step": 21799, + "teacher_loss": 0.18624094128608704 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.29141971468925476, + "learning_rate": 5.536208907372717e-06, + "loss": 0.1722, + "step": 21800, + "teacher_loss": 0.1589074581861496 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3927082419395447, + "learning_rate": 5.534446870437404e-06, + "loss": 0.2846, + "step": 21801, + "teacher_loss": 0.2725604772567749 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.099654421210289, + "learning_rate": 5.532685050518842e-06, + "loss": 0.1741, + "step": 21802, + "teacher_loss": 0.1823582649230957 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3140486180782318, + "learning_rate": 5.530923447657425e-06, + "loss": 0.2063, + "step": 21803, + "teacher_loss": 0.1943490207195282 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.537128746509552, + "learning_rate": 5.529162061893551e-06, + "loss": 0.2741, + "step": 21804, + "teacher_loss": 0.2448616623878479 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.4055987298488617, + "learning_rate": 5.527400893267588e-06, + "loss": 0.2032, + "step": 21805, + "teacher_loss": 0.18075653910636902 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.7366605997085571, + "learning_rate": 5.525639941819924e-06, + "loss": 0.287, + "step": 21806, + "teacher_loss": 0.23702384531497955 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.4150068163871765, + "learning_rate": 5.5238792075909345e-06, + "loss": 0.3218, + "step": 21807, + "teacher_loss": 0.31139639019966125 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.2262985110282898, + "learning_rate": 5.52211869062098e-06, + "loss": 0.2492, + "step": 21808, + "teacher_loss": 0.25169500708580017 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.6309796571731567, + "learning_rate": 5.520358390950427e-06, + "loss": 0.2852, + "step": 21809, + "teacher_loss": 0.24677090346813202 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.2678382992744446, + "learning_rate": 5.51859830861964e-06, + "loss": 0.1759, + "step": 21810, + "teacher_loss": 0.1657118797302246 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.33942747116088867, + "learning_rate": 5.516838443668967e-06, + "loss": 0.1828, + "step": 21811, + "teacher_loss": 0.16539278626441956 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.44235509634017944, + "learning_rate": 5.515078796138746e-06, + "loss": 0.2174, + "step": 21812, + "teacher_loss": 0.19235381484031677 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.5736873149871826, + "learning_rate": 5.513319366069343e-06, + "loss": 0.2513, + "step": 21813, + "teacher_loss": 0.21543771028518677 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.8301466703414917, + "learning_rate": 5.511560153501083e-06, + "loss": 0.3284, + "step": 21814, + "teacher_loss": 0.27264589071273804 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.5817592144012451, + "learning_rate": 5.50980115847429e-06, + "loss": 0.2379, + "step": 21815, + "teacher_loss": 0.19968563318252563 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.3100306987762451, + "learning_rate": 5.508042381029314e-06, + "loss": 0.2265, + "step": 21816, + "teacher_loss": 0.21723297238349915 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.4455883204936981, + "learning_rate": 5.506283821206468e-06, + "loss": 0.2349, + "step": 21817, + "teacher_loss": 0.21149076521396637 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.23605969548225403, + "learning_rate": 5.504525479046064e-06, + "loss": 0.2005, + "step": 21818, + "teacher_loss": 0.19651061296463013 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.6319211721420288, + "learning_rate": 5.502767354588425e-06, + "loss": 0.5765, + "step": 21819, + "teacher_loss": 0.570328950881958 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.7493718862533569, + "learning_rate": 5.501009447873859e-06, + "loss": 0.4379, + "step": 21820, + "teacher_loss": 0.40327948331832886 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.4499378800392151, + "learning_rate": 5.499251758942662e-06, + "loss": 0.3207, + "step": 21821, + "teacher_loss": 0.3062889575958252 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.7097110748291016, + "learning_rate": 5.497494287835138e-06, + "loss": 0.2722, + "step": 21822, + "teacher_loss": 0.2235608994960785 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.33150917291641235, + "learning_rate": 5.495737034591584e-06, + "loss": 0.1943, + "step": 21823, + "teacher_loss": 0.1790352314710617 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.35428741574287415, + "learning_rate": 5.4939799992522864e-06, + "loss": 0.1636, + "step": 21824, + "teacher_loss": 0.1423853486776352 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.5675662755966187, + "learning_rate": 5.492223181857515e-06, + "loss": 0.2165, + "step": 21825, + "teacher_loss": 0.1774882972240448 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.591162383556366, + "learning_rate": 5.490466582447572e-06, + "loss": 0.2788, + "step": 21826, + "teacher_loss": 0.2440471202135086 + }, + { + "compression_loss": 0.0, + "epoch": 3.94, + "label_loss": 0.38896623253822327, + "learning_rate": 5.488710201062717e-06, + "loss": 0.2564, + "step": 21827, + "teacher_loss": 0.24171802401542664 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.2594240605831146, + "learning_rate": 5.486954037743218e-06, + "loss": 0.1649, + "step": 21828, + "teacher_loss": 0.15434449911117554 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.5492939949035645, + "learning_rate": 5.48519809252934e-06, + "loss": 0.254, + "step": 21829, + "teacher_loss": 0.2211982011795044 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.40110912919044495, + "learning_rate": 5.483442365461349e-06, + "loss": 0.2413, + "step": 21830, + "teacher_loss": 0.22355502843856812 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.8902040123939514, + "learning_rate": 5.481686856579487e-06, + "loss": 0.2806, + "step": 21831, + "teacher_loss": 0.2128480076789856 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.5316786766052246, + "learning_rate": 5.479931565924007e-06, + "loss": 0.314, + "step": 21832, + "teacher_loss": 0.2898046374320984 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.056444279849529266, + "learning_rate": 5.478176493535159e-06, + "loss": 0.138, + "step": 21833, + "teacher_loss": 0.14707374572753906 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.2887080907821655, + "learning_rate": 5.476421639453172e-06, + "loss": 0.1842, + "step": 21834, + "teacher_loss": 0.1726272702217102 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.47937312722206116, + "learning_rate": 5.474667003718282e-06, + "loss": 0.2876, + "step": 21835, + "teacher_loss": 0.2663082480430603 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.50431227684021, + "learning_rate": 5.472912586370725e-06, + "loss": 0.2268, + "step": 21836, + "teacher_loss": 0.19596019387245178 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.3487039804458618, + "learning_rate": 5.471158387450716e-06, + "loss": 0.175, + "step": 21837, + "teacher_loss": 0.15565647184848785 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.49248382449150085, + "learning_rate": 5.469404406998478e-06, + "loss": 0.3081, + "step": 21838, + "teacher_loss": 0.2875773310661316 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.473918616771698, + "learning_rate": 5.467650645054218e-06, + "loss": 0.368, + "step": 21839, + "teacher_loss": 0.35623180866241455 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.23867295682430267, + "learning_rate": 5.4658971016581505e-06, + "loss": 0.1834, + "step": 21840, + "teacher_loss": 0.17722952365875244 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.8058426976203918, + "learning_rate": 5.464143776850483e-06, + "loss": 0.4407, + "step": 21841, + "teacher_loss": 0.40007829666137695 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.35105955600738525, + "learning_rate": 5.462390670671403e-06, + "loss": 0.198, + "step": 21842, + "teacher_loss": 0.1809605211019516 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.3181931674480438, + "learning_rate": 5.4606377831611135e-06, + "loss": 0.2697, + "step": 21843, + "teacher_loss": 0.26432764530181885 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.30017197132110596, + "learning_rate": 5.458885114359795e-06, + "loss": 0.2011, + "step": 21844, + "teacher_loss": 0.19004732370376587 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.44406893849372864, + "learning_rate": 5.457132664307636e-06, + "loss": 0.2961, + "step": 21845, + "teacher_loss": 0.27965036034584045 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.5293300747871399, + "learning_rate": 5.455380433044819e-06, + "loss": 0.3116, + "step": 21846, + "teacher_loss": 0.28742873668670654 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.2917672395706177, + "learning_rate": 5.453628420611507e-06, + "loss": 0.1952, + "step": 21847, + "teacher_loss": 0.18445372581481934 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.45203158259391785, + "learning_rate": 5.451876627047873e-06, + "loss": 0.2534, + "step": 21848, + "teacher_loss": 0.23138515651226044 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.3581094741821289, + "learning_rate": 5.4501250523940875e-06, + "loss": 0.2194, + "step": 21849, + "teacher_loss": 0.20397770404815674 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.2336127758026123, + "learning_rate": 5.448373696690297e-06, + "loss": 0.1412, + "step": 21850, + "teacher_loss": 0.13092857599258423 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 1.235878825187683, + "learning_rate": 5.446622559976665e-06, + "loss": 0.3322, + "step": 21851, + "teacher_loss": 0.23183083534240723 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.4069324731826782, + "learning_rate": 5.44487164229333e-06, + "loss": 0.1788, + "step": 21852, + "teacher_loss": 0.15341225266456604 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.38151299953460693, + "learning_rate": 5.443120943680441e-06, + "loss": 0.1677, + "step": 21853, + "teacher_loss": 0.14396381378173828 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.8180519342422485, + "learning_rate": 5.441370464178142e-06, + "loss": 0.2228, + "step": 21854, + "teacher_loss": 0.15660548210144043 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.774874210357666, + "learning_rate": 5.439620203826553e-06, + "loss": 0.3775, + "step": 21855, + "teacher_loss": 0.3333788812160492 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.6710628867149353, + "learning_rate": 5.43787016266581e-06, + "loss": 0.2115, + "step": 21856, + "teacher_loss": 0.16038648784160614 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.40120095014572144, + "learning_rate": 5.43612034073604e-06, + "loss": 0.185, + "step": 21857, + "teacher_loss": 0.1610175222158432 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.7244506478309631, + "learning_rate": 5.434370738077352e-06, + "loss": 0.3165, + "step": 21858, + "teacher_loss": 0.2711865305900574 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.6187089085578918, + "learning_rate": 5.432621354729863e-06, + "loss": 0.2043, + "step": 21859, + "teacher_loss": 0.15826475620269775 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.2565605044364929, + "learning_rate": 5.430872190733688e-06, + "loss": 0.2408, + "step": 21860, + "teacher_loss": 0.23909947276115417 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.2647855877876282, + "learning_rate": 5.4291232461289244e-06, + "loss": 0.1918, + "step": 21861, + "teacher_loss": 0.1836376041173935 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.24888500571250916, + "learning_rate": 5.427374520955659e-06, + "loss": 0.1442, + "step": 21862, + "teacher_loss": 0.13257496058940887 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.5142412781715393, + "learning_rate": 5.425626015254004e-06, + "loss": 0.2939, + "step": 21863, + "teacher_loss": 0.26938581466674805 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.32193291187286377, + "learning_rate": 5.4238777290640415e-06, + "loss": 0.2117, + "step": 21864, + "teacher_loss": 0.19943425059318542 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.47531965374946594, + "learning_rate": 5.422129662425845e-06, + "loss": 0.2375, + "step": 21865, + "teacher_loss": 0.2110653817653656 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 1.0679867267608643, + "learning_rate": 5.420381815379501e-06, + "loss": 0.4126, + "step": 21866, + "teacher_loss": 0.33975234627723694 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.22654172778129578, + "learning_rate": 5.4186341879650845e-06, + "loss": 0.154, + "step": 21867, + "teacher_loss": 0.14592206478118896 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.34396350383758545, + "learning_rate": 5.416886780222657e-06, + "loss": 0.3483, + "step": 21868, + "teacher_loss": 0.3487998843193054 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.48915645480155945, + "learning_rate": 5.41513959219228e-06, + "loss": 0.232, + "step": 21869, + "teacher_loss": 0.20348191261291504 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.409118115901947, + "learning_rate": 5.413392623914023e-06, + "loss": 0.1913, + "step": 21870, + "teacher_loss": 0.1671338975429535 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.7449277639389038, + "learning_rate": 5.411645875427926e-06, + "loss": 0.5844, + "step": 21871, + "teacher_loss": 0.5665807723999023 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.35170722007751465, + "learning_rate": 5.409899346774041e-06, + "loss": 0.1855, + "step": 21872, + "teacher_loss": 0.16700395941734314 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.41726088523864746, + "learning_rate": 5.408153037992416e-06, + "loss": 0.2622, + "step": 21873, + "teacher_loss": 0.24496275186538696 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.1638878881931305, + "learning_rate": 5.406406949123086e-06, + "loss": 0.1534, + "step": 21874, + "teacher_loss": 0.15228670835494995 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.9312049746513367, + "learning_rate": 5.404661080206068e-06, + "loss": 0.3758, + "step": 21875, + "teacher_loss": 0.3140539526939392 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.7727344632148743, + "learning_rate": 5.402915431281417e-06, + "loss": 0.2677, + "step": 21876, + "teacher_loss": 0.21159148216247559 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.17131558060646057, + "learning_rate": 5.401170002389139e-06, + "loss": 0.1889, + "step": 21877, + "teacher_loss": 0.19080935418605804 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.3958020508289337, + "learning_rate": 5.39942479356925e-06, + "loss": 0.1737, + "step": 21878, + "teacher_loss": 0.14901262521743774 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.5957311987876892, + "learning_rate": 5.397679804861766e-06, + "loss": 0.2385, + "step": 21879, + "teacher_loss": 0.1987738013267517 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.40834569931030273, + "learning_rate": 5.3959350363067e-06, + "loss": 0.197, + "step": 21880, + "teacher_loss": 0.17347633838653564 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.5880615711212158, + "learning_rate": 5.394190487944044e-06, + "loss": 0.271, + "step": 21881, + "teacher_loss": 0.23574616014957428 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.7812591195106506, + "learning_rate": 5.392446159813802e-06, + "loss": 0.3244, + "step": 21882, + "teacher_loss": 0.27360600233078003 + }, + { + "compression_loss": 0.0, + "epoch": 3.95, + "label_loss": 0.48583924770355225, + "learning_rate": 5.390702051955967e-06, + "loss": 0.1665, + "step": 21883, + "teacher_loss": 0.13096457719802856 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.17131707072257996, + "learning_rate": 5.38895816441052e-06, + "loss": 0.1386, + "step": 21884, + "teacher_loss": 0.13492825627326965 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5767116546630859, + "learning_rate": 5.387214497217447e-06, + "loss": 0.2577, + "step": 21885, + "teacher_loss": 0.22230495512485504 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 1.3411751985549927, + "learning_rate": 5.385471050416731e-06, + "loss": 0.3623, + "step": 21886, + "teacher_loss": 0.25353020429611206 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.36906176805496216, + "learning_rate": 5.383727824048337e-06, + "loss": 0.3784, + "step": 21887, + "teacher_loss": 0.3794023394584656 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.49249595403671265, + "learning_rate": 5.381984818152223e-06, + "loss": 0.2447, + "step": 21888, + "teacher_loss": 0.21719059348106384 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.10936786234378815, + "learning_rate": 5.38024203276837e-06, + "loss": 0.1409, + "step": 21889, + "teacher_loss": 0.14434897899627686 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.11709539592266083, + "learning_rate": 5.378499467936725e-06, + "loss": 0.1705, + "step": 21890, + "teacher_loss": 0.176442489027977 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5083616375923157, + "learning_rate": 5.376757123697238e-06, + "loss": 0.2001, + "step": 21891, + "teacher_loss": 0.16587528586387634 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.22735896706581116, + "learning_rate": 5.375015000089856e-06, + "loss": 0.1575, + "step": 21892, + "teacher_loss": 0.14976409077644348 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.3107032775878906, + "learning_rate": 5.37327309715453e-06, + "loss": 0.1738, + "step": 21893, + "teacher_loss": 0.15860790014266968 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5569702386856079, + "learning_rate": 5.371531414931183e-06, + "loss": 0.2272, + "step": 21894, + "teacher_loss": 0.1905101090669632 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.24172909557819366, + "learning_rate": 5.369789953459752e-06, + "loss": 0.1724, + "step": 21895, + "teacher_loss": 0.16465112566947937 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.6556931734085083, + "learning_rate": 5.368048712780171e-06, + "loss": 0.2669, + "step": 21896, + "teacher_loss": 0.2237006574869156 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.40778079628944397, + "learning_rate": 5.3663076929323505e-06, + "loss": 0.1996, + "step": 21897, + "teacher_loss": 0.1764843463897705 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 1.3197052478790283, + "learning_rate": 5.36456689395621e-06, + "loss": 0.5313, + "step": 21898, + "teacher_loss": 0.44372835755348206 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.2644239068031311, + "learning_rate": 5.362826315891669e-06, + "loss": 0.1717, + "step": 21899, + "teacher_loss": 0.16143682599067688 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.38508686423301697, + "learning_rate": 5.361085958778619e-06, + "loss": 0.2071, + "step": 21900, + "teacher_loss": 0.1873604953289032 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.19162605702877045, + "learning_rate": 5.359345822656975e-06, + "loss": 0.1814, + "step": 21901, + "teacher_loss": 0.1802879273891449 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.554720938205719, + "learning_rate": 5.357605907566623e-06, + "loss": 0.427, + "step": 21902, + "teacher_loss": 0.4128074645996094 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.3988930583000183, + "learning_rate": 5.3558662135474586e-06, + "loss": 0.2117, + "step": 21903, + "teacher_loss": 0.19088244438171387 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.3039918839931488, + "learning_rate": 5.35412674063937e-06, + "loss": 0.1842, + "step": 21904, + "teacher_loss": 0.1708521842956543 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.8364557027816772, + "learning_rate": 5.352387488882232e-06, + "loss": 0.2954, + "step": 21905, + "teacher_loss": 0.2352619469165802 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5868608951568604, + "learning_rate": 5.350648458315929e-06, + "loss": 0.2448, + "step": 21906, + "teacher_loss": 0.2068420797586441 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.40496188402175903, + "learning_rate": 5.348909648980321e-06, + "loss": 0.1858, + "step": 21907, + "teacher_loss": 0.16146139800548553 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.4571641683578491, + "learning_rate": 5.347171060915278e-06, + "loss": 0.2853, + "step": 21908, + "teacher_loss": 0.2662588953971863 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.42678797245025635, + "learning_rate": 5.345432694160669e-06, + "loss": 0.2124, + "step": 21909, + "teacher_loss": 0.18859143555164337 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.22702844440937042, + "learning_rate": 5.343694548756338e-06, + "loss": 0.1918, + "step": 21910, + "teacher_loss": 0.18784919381141663 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5872438549995422, + "learning_rate": 5.341956624742142e-06, + "loss": 0.2541, + "step": 21911, + "teacher_loss": 0.2170286327600479 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.3950216770172119, + "learning_rate": 5.340218922157922e-06, + "loss": 0.2017, + "step": 21912, + "teacher_loss": 0.18019825220108032 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.4140271544456482, + "learning_rate": 5.33848144104352e-06, + "loss": 0.1925, + "step": 21913, + "teacher_loss": 0.16784211993217468 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.2768034040927887, + "learning_rate": 5.336744181438776e-06, + "loss": 0.1862, + "step": 21914, + "teacher_loss": 0.17618080973625183 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.885356068611145, + "learning_rate": 5.335007143383512e-06, + "loss": 0.43, + "step": 21915, + "teacher_loss": 0.37940162420272827 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.4397169351577759, + "learning_rate": 5.333270326917555e-06, + "loss": 0.2712, + "step": 21916, + "teacher_loss": 0.25245797634124756 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.3569839596748352, + "learning_rate": 5.331533732080735e-06, + "loss": 0.2081, + "step": 21917, + "teacher_loss": 0.1915380209684372 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.3570977449417114, + "learning_rate": 5.329797358912851e-06, + "loss": 0.2494, + "step": 21918, + "teacher_loss": 0.23746784031391144 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.4444868564605713, + "learning_rate": 5.328061207453724e-06, + "loss": 0.2691, + "step": 21919, + "teacher_loss": 0.2495938092470169 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.24075017869472504, + "learning_rate": 5.326325277743157e-06, + "loss": 0.1704, + "step": 21920, + "teacher_loss": 0.1625375747680664 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5903390645980835, + "learning_rate": 5.324589569820945e-06, + "loss": 0.3088, + "step": 21921, + "teacher_loss": 0.27756839990615845 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.24058055877685547, + "learning_rate": 5.3228540837268854e-06, + "loss": 0.1737, + "step": 21922, + "teacher_loss": 0.16629287600517273 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.6437263488769531, + "learning_rate": 5.321118819500774e-06, + "loss": 0.1981, + "step": 21923, + "teacher_loss": 0.14857585728168488 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5505216717720032, + "learning_rate": 5.319383777182389e-06, + "loss": 0.3062, + "step": 21924, + "teacher_loss": 0.2790648937225342 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.7036067247390747, + "learning_rate": 5.3176489568115e-06, + "loss": 0.2472, + "step": 21925, + "teacher_loss": 0.19644492864608765 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.4073464870452881, + "learning_rate": 5.3159143584279e-06, + "loss": 0.2291, + "step": 21926, + "teacher_loss": 0.20932528376579285 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.24316957592964172, + "learning_rate": 5.314179982071351e-06, + "loss": 0.1628, + "step": 21927, + "teacher_loss": 0.15388178825378418 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.24487102031707764, + "learning_rate": 5.312445827781609e-06, + "loss": 0.2651, + "step": 21928, + "teacher_loss": 0.2673344910144806 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.16839244961738586, + "learning_rate": 5.310711895598439e-06, + "loss": 0.2103, + "step": 21929, + "teacher_loss": 0.2149975597858429 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.2728954553604126, + "learning_rate": 5.308978185561603e-06, + "loss": 0.239, + "step": 21930, + "teacher_loss": 0.23524220287799835 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.17911314964294434, + "learning_rate": 5.307244697710834e-06, + "loss": 0.1567, + "step": 21931, + "teacher_loss": 0.1542629599571228 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.7517884969711304, + "learning_rate": 5.305511432085885e-06, + "loss": 0.2734, + "step": 21932, + "teacher_loss": 0.2202303111553192 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.7069342732429504, + "learning_rate": 5.303778388726498e-06, + "loss": 0.2402, + "step": 21933, + "teacher_loss": 0.18835780024528503 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.21009202301502228, + "learning_rate": 5.302045567672403e-06, + "loss": 0.2139, + "step": 21934, + "teacher_loss": 0.21434888243675232 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.6228509545326233, + "learning_rate": 5.3003129689633165e-06, + "loss": 0.3394, + "step": 21935, + "teacher_loss": 0.30785971879959106 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.5184410214424133, + "learning_rate": 5.298580592638982e-06, + "loss": 0.2127, + "step": 21936, + "teacher_loss": 0.17874939739704132 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.37738293409347534, + "learning_rate": 5.296848438739108e-06, + "loss": 0.2038, + "step": 21937, + "teacher_loss": 0.18446308374404907 + }, + { + "compression_loss": 0.0, + "epoch": 3.96, + "label_loss": 0.16322796046733856, + "learning_rate": 5.295116507303398e-06, + "loss": 0.1947, + "step": 21938, + "teacher_loss": 0.19823195040225983 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5134272575378418, + "learning_rate": 5.29338479837158e-06, + "loss": 0.3674, + "step": 21939, + "teacher_loss": 0.3511603772640228 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.7748701572418213, + "learning_rate": 5.291653311983345e-06, + "loss": 0.3179, + "step": 21940, + "teacher_loss": 0.2671516239643097 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.2868606448173523, + "learning_rate": 5.28992204817839e-06, + "loss": 0.1578, + "step": 21941, + "teacher_loss": 0.14349418878555298 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.38682180643081665, + "learning_rate": 5.288191006996408e-06, + "loss": 0.23, + "step": 21942, + "teacher_loss": 0.2125495821237564 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5617120862007141, + "learning_rate": 5.286460188477096e-06, + "loss": 0.2656, + "step": 21943, + "teacher_loss": 0.2326863557100296 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.11406774073839188, + "learning_rate": 5.284729592660122e-06, + "loss": 0.1522, + "step": 21944, + "teacher_loss": 0.15642790496349335 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.1886179894208908, + "learning_rate": 5.282999219585172e-06, + "loss": 0.1552, + "step": 21945, + "teacher_loss": 0.15147164463996887 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5381919145584106, + "learning_rate": 5.28126906929192e-06, + "loss": 0.2979, + "step": 21946, + "teacher_loss": 0.2712230682373047 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.2647937536239624, + "learning_rate": 5.279539141820032e-06, + "loss": 0.1907, + "step": 21947, + "teacher_loss": 0.1824457049369812 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4587059020996094, + "learning_rate": 5.277809437209156e-06, + "loss": 0.2646, + "step": 21948, + "teacher_loss": 0.2430022805929184 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.26791971921920776, + "learning_rate": 5.276079955498974e-06, + "loss": 0.1901, + "step": 21949, + "teacher_loss": 0.18147799372673035 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.37886765599250793, + "learning_rate": 5.274350696729122e-06, + "loss": 0.1962, + "step": 21950, + "teacher_loss": 0.17591789364814758 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.38569360971450806, + "learning_rate": 5.2726216609392455e-06, + "loss": 0.2594, + "step": 21951, + "teacher_loss": 0.24531909823417664 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4861651062965393, + "learning_rate": 5.270892848168989e-06, + "loss": 0.2013, + "step": 21952, + "teacher_loss": 0.16964855790138245 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.21778753399848938, + "learning_rate": 5.269164258457997e-06, + "loss": 0.1992, + "step": 21953, + "teacher_loss": 0.19712093472480774 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4101647734642029, + "learning_rate": 5.267435891845887e-06, + "loss": 0.2353, + "step": 21954, + "teacher_loss": 0.21583417057991028 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5885810852050781, + "learning_rate": 5.265707748372295e-06, + "loss": 0.2264, + "step": 21955, + "teacher_loss": 0.1862037181854248 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.44949185848236084, + "learning_rate": 5.263979828076843e-06, + "loss": 0.2402, + "step": 21956, + "teacher_loss": 0.2169126570224762 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.20756162703037262, + "learning_rate": 5.262252130999137e-06, + "loss": 0.1484, + "step": 21957, + "teacher_loss": 0.14178110659122467 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.15677033364772797, + "learning_rate": 5.260524657178796e-06, + "loss": 0.1709, + "step": 21958, + "teacher_loss": 0.17251485586166382 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5402560234069824, + "learning_rate": 5.258797406655429e-06, + "loss": 0.223, + "step": 21959, + "teacher_loss": 0.18779020011425018 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.1859736144542694, + "learning_rate": 5.257070379468627e-06, + "loss": 0.1555, + "step": 21960, + "teacher_loss": 0.15209782123565674 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.31697842478752136, + "learning_rate": 5.255343575657995e-06, + "loss": 0.1817, + "step": 21961, + "teacher_loss": 0.16671551764011383 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.48178887367248535, + "learning_rate": 5.253616995263114e-06, + "loss": 0.2626, + "step": 21962, + "teacher_loss": 0.23824243247509003 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.18887396156787872, + "learning_rate": 5.251890638323573e-06, + "loss": 0.1713, + "step": 21963, + "teacher_loss": 0.1693476140499115 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.36619412899017334, + "learning_rate": 5.25016450487896e-06, + "loss": 0.2081, + "step": 21964, + "teacher_loss": 0.19056178629398346 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 1.170992374420166, + "learning_rate": 5.248438594968837e-06, + "loss": 0.3506, + "step": 21965, + "teacher_loss": 0.2594633102416992 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.2523373067378998, + "learning_rate": 5.246712908632781e-06, + "loss": 0.2452, + "step": 21966, + "teacher_loss": 0.2443729043006897 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.7331432700157166, + "learning_rate": 5.244987445910361e-06, + "loss": 0.2292, + "step": 21967, + "teacher_loss": 0.1732456088066101 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5944974422454834, + "learning_rate": 5.243262206841127e-06, + "loss": 0.1933, + "step": 21968, + "teacher_loss": 0.14867115020751953 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.512334406375885, + "learning_rate": 5.241537191464644e-06, + "loss": 0.1948, + "step": 21969, + "teacher_loss": 0.15955850481987 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4872363805770874, + "learning_rate": 5.23981239982045e-06, + "loss": 0.2486, + "step": 21970, + "teacher_loss": 0.22211408615112305 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.30077463388442993, + "learning_rate": 5.2380878319480944e-06, + "loss": 0.2451, + "step": 21971, + "teacher_loss": 0.2389182448387146 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4363465905189514, + "learning_rate": 5.236363487887123e-06, + "loss": 0.2195, + "step": 21972, + "teacher_loss": 0.19537557661533356 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.7902982234954834, + "learning_rate": 5.234639367677059e-06, + "loss": 0.5048, + "step": 21973, + "teacher_loss": 0.4730609059333801 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.23141071200370789, + "learning_rate": 5.232915471357441e-06, + "loss": 0.1557, + "step": 21974, + "teacher_loss": 0.14732295274734497 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.485482782125473, + "learning_rate": 5.2311917989677835e-06, + "loss": 0.2487, + "step": 21975, + "teacher_loss": 0.22240306437015533 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.8187615275382996, + "learning_rate": 5.229468350547608e-06, + "loss": 0.2183, + "step": 21976, + "teacher_loss": 0.15154016017913818 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.2518799602985382, + "learning_rate": 5.227745126136436e-06, + "loss": 0.1736, + "step": 21977, + "teacher_loss": 0.16494855284690857 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.3740791082382202, + "learning_rate": 5.2260221257737656e-06, + "loss": 0.1892, + "step": 21978, + "teacher_loss": 0.16861701011657715 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.3134530186653137, + "learning_rate": 5.224299349499102e-06, + "loss": 0.1815, + "step": 21979, + "teacher_loss": 0.16689112782478333 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.13086268305778503, + "learning_rate": 5.222576797351951e-06, + "loss": 0.1597, + "step": 21980, + "teacher_loss": 0.1629471778869629 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.44103682041168213, + "learning_rate": 5.220854469371794e-06, + "loss": 0.184, + "step": 21981, + "teacher_loss": 0.15545111894607544 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.305397629737854, + "learning_rate": 5.219132365598126e-06, + "loss": 0.1847, + "step": 21982, + "teacher_loss": 0.17123432457447052 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4067939519882202, + "learning_rate": 5.217410486070433e-06, + "loss": 0.2329, + "step": 21983, + "teacher_loss": 0.21361765265464783 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.7942676544189453, + "learning_rate": 5.2156888308281875e-06, + "loss": 0.3093, + "step": 21984, + "teacher_loss": 0.25536322593688965 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.4729829430580139, + "learning_rate": 5.213967399910852e-06, + "loss": 0.3221, + "step": 21985, + "teacher_loss": 0.30538833141326904 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.13893572986125946, + "learning_rate": 5.212246193357914e-06, + "loss": 0.2003, + "step": 21986, + "teacher_loss": 0.20710909366607666 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.9185959100723267, + "learning_rate": 5.210525211208826e-06, + "loss": 0.2589, + "step": 21987, + "teacher_loss": 0.18559253215789795 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.505616307258606, + "learning_rate": 5.208804453503034e-06, + "loss": 0.6072, + "step": 21988, + "teacher_loss": 0.618511438369751 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.5615938305854797, + "learning_rate": 5.207083920280011e-06, + "loss": 0.2407, + "step": 21989, + "teacher_loss": 0.2050146907567978 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.20613881945610046, + "learning_rate": 5.205363611579192e-06, + "loss": 0.1974, + "step": 21990, + "teacher_loss": 0.1964205801486969 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.2284737229347229, + "learning_rate": 5.203643527440016e-06, + "loss": 0.3099, + "step": 21991, + "teacher_loss": 0.31893667578697205 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.2649606466293335, + "learning_rate": 5.201923667901921e-06, + "loss": 0.2806, + "step": 21992, + "teacher_loss": 0.282285213470459 + }, + { + "compression_loss": 0.0, + "epoch": 3.97, + "label_loss": 0.49434322118759155, + "learning_rate": 5.200204033004347e-06, + "loss": 0.2238, + "step": 21993, + "teacher_loss": 0.19372862577438354 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.5153838396072388, + "learning_rate": 5.198484622786708e-06, + "loss": 0.2454, + "step": 21994, + "teacher_loss": 0.2153470516204834 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.28129684925079346, + "learning_rate": 5.19676543728843e-06, + "loss": 0.1925, + "step": 21995, + "teacher_loss": 0.18261420726776123 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 1.0850865840911865, + "learning_rate": 5.195046476548932e-06, + "loss": 0.3091, + "step": 21996, + "teacher_loss": 0.2228819727897644 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4608195424079895, + "learning_rate": 5.193327740607624e-06, + "loss": 0.2134, + "step": 21997, + "teacher_loss": 0.18594929575920105 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.7459148168563843, + "learning_rate": 5.191609229503898e-06, + "loss": 0.4145, + "step": 21998, + "teacher_loss": 0.37763434648513794 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.22278110682964325, + "learning_rate": 5.189890943277175e-06, + "loss": 0.2346, + "step": 21999, + "teacher_loss": 0.2359198033809662 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.7811424732208252, + "learning_rate": 5.188172881966841e-06, + "loss": 0.4405, + "step": 22000, + "teacher_loss": 0.4026644229888916 + }, + { + "epoch": 3.98, + "eval_exact_match": 80.3027436140019, + "eval_f1": 87.79446748314663, + "step": 22000 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4003264904022217, + "learning_rate": 5.186455045612279e-06, + "loss": 0.1988, + "step": 22001, + "teacher_loss": 0.17646202445030212 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.45877087116241455, + "learning_rate": 5.184737434252882e-06, + "loss": 0.2387, + "step": 22002, + "teacher_loss": 0.2142493575811386 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.3367345333099365, + "learning_rate": 5.183020047928034e-06, + "loss": 0.273, + "step": 22003, + "teacher_loss": 0.26594263315200806 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.5213367938995361, + "learning_rate": 5.181302886677095e-06, + "loss": 0.2577, + "step": 22004, + "teacher_loss": 0.2284557968378067 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.45731914043426514, + "learning_rate": 5.179585950539445e-06, + "loss": 0.2418, + "step": 22005, + "teacher_loss": 0.21788471937179565 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4329388439655304, + "learning_rate": 5.177869239554452e-06, + "loss": 0.2425, + "step": 22006, + "teacher_loss": 0.22131648659706116 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.34081748127937317, + "learning_rate": 5.176152753761461e-06, + "loss": 0.2359, + "step": 22007, + "teacher_loss": 0.2242031693458557 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.7113511562347412, + "learning_rate": 5.174436493199836e-06, + "loss": 0.2728, + "step": 22008, + "teacher_loss": 0.22410425543785095 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.44916754961013794, + "learning_rate": 5.172720457908929e-06, + "loss": 0.2693, + "step": 22009, + "teacher_loss": 0.24927031993865967 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.25898346304893494, + "learning_rate": 5.171004647928078e-06, + "loss": 0.1851, + "step": 22010, + "teacher_loss": 0.17690569162368774 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.340464323759079, + "learning_rate": 5.169289063296611e-06, + "loss": 0.2026, + "step": 22011, + "teacher_loss": 0.1872323900461197 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.3818777799606323, + "learning_rate": 5.167573704053882e-06, + "loss": 0.2761, + "step": 22012, + "teacher_loss": 0.2643362879753113 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.48050642013549805, + "learning_rate": 5.165858570239208e-06, + "loss": 0.236, + "step": 22013, + "teacher_loss": 0.20881447196006775 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.1785348504781723, + "learning_rate": 5.164143661891909e-06, + "loss": 0.1513, + "step": 22014, + "teacher_loss": 0.1482510268688202 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.6053401231765747, + "learning_rate": 5.162428979051306e-06, + "loss": 0.3338, + "step": 22015, + "teacher_loss": 0.3035826086997986 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.40746402740478516, + "learning_rate": 5.160714521756716e-06, + "loss": 0.2223, + "step": 22016, + "teacher_loss": 0.20174473524093628 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.37482452392578125, + "learning_rate": 5.159000290047437e-06, + "loss": 0.2386, + "step": 22017, + "teacher_loss": 0.22346365451812744 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.3597516417503357, + "learning_rate": 5.157286283962779e-06, + "loss": 0.212, + "step": 22018, + "teacher_loss": 0.19559751451015472 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.5408403277397156, + "learning_rate": 5.15557250354204e-06, + "loss": 0.255, + "step": 22019, + "teacher_loss": 0.223200261592865 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.5866103172302246, + "learning_rate": 5.153858948824504e-06, + "loss": 0.2436, + "step": 22020, + "teacher_loss": 0.20552338659763336 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.25638547539711, + "learning_rate": 5.152145619849461e-06, + "loss": 0.2177, + "step": 22021, + "teacher_loss": 0.21336421370506287 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4853782057762146, + "learning_rate": 5.1504325166561995e-06, + "loss": 0.2431, + "step": 22022, + "teacher_loss": 0.21622025966644287 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.17334792017936707, + "learning_rate": 5.148719639283985e-06, + "loss": 0.165, + "step": 22023, + "teacher_loss": 0.16411250829696655 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.41954725980758667, + "learning_rate": 5.147006987772097e-06, + "loss": 0.2275, + "step": 22024, + "teacher_loss": 0.20621536672115326 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4138488173484802, + "learning_rate": 5.145294562159794e-06, + "loss": 0.188, + "step": 22025, + "teacher_loss": 0.16293151676654816 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.1527281105518341, + "learning_rate": 5.143582362486341e-06, + "loss": 0.1607, + "step": 22026, + "teacher_loss": 0.16163088381290436 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4191634953022003, + "learning_rate": 5.141870388790998e-06, + "loss": 0.2674, + "step": 22027, + "teacher_loss": 0.25057196617126465 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.21376797556877136, + "learning_rate": 5.140158641113005e-06, + "loss": 0.2135, + "step": 22028, + "teacher_loss": 0.21347551047801971 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.6267105340957642, + "learning_rate": 5.138447119491613e-06, + "loss": 0.2759, + "step": 22029, + "teacher_loss": 0.23692211508750916 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4355524182319641, + "learning_rate": 5.1367358239660685e-06, + "loss": 0.3392, + "step": 22030, + "teacher_loss": 0.32846057415008545 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.32390856742858887, + "learning_rate": 5.135024754575595e-06, + "loss": 0.2357, + "step": 22031, + "teacher_loss": 0.22592945396900177 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.39424145221710205, + "learning_rate": 5.133313911359431e-06, + "loss": 0.2402, + "step": 22032, + "teacher_loss": 0.2231176793575287 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.35485368967056274, + "learning_rate": 5.131603294356793e-06, + "loss": 0.2006, + "step": 22033, + "teacher_loss": 0.1834208220243454 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.7223978042602539, + "learning_rate": 5.129892903606909e-06, + "loss": 0.2442, + "step": 22034, + "teacher_loss": 0.191110759973526 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.2598540782928467, + "learning_rate": 5.1281827391489826e-06, + "loss": 0.2022, + "step": 22035, + "teacher_loss": 0.19583234190940857 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.3204004168510437, + "learning_rate": 5.12647280102223e-06, + "loss": 0.2429, + "step": 22036, + "teacher_loss": 0.23428279161453247 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.7353971004486084, + "learning_rate": 5.12476308926586e-06, + "loss": 0.2717, + "step": 22037, + "teacher_loss": 0.2201557457447052 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4952617883682251, + "learning_rate": 5.1230536039190575e-06, + "loss": 0.1909, + "step": 22038, + "teacher_loss": 0.1570655107498169 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 1.0159000158309937, + "learning_rate": 5.121344345021025e-06, + "loss": 0.3128, + "step": 22039, + "teacher_loss": 0.2347167432308197 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.3604879379272461, + "learning_rate": 5.119635312610952e-06, + "loss": 0.1822, + "step": 22040, + "teacher_loss": 0.1623912751674652 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.8334888219833374, + "learning_rate": 5.117926506728015e-06, + "loss": 0.2754, + "step": 22041, + "teacher_loss": 0.21336081624031067 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.5891445279121399, + "learning_rate": 5.116217927411394e-06, + "loss": 0.2314, + "step": 22042, + "teacher_loss": 0.19167275726795197 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.330754816532135, + "learning_rate": 5.114509574700269e-06, + "loss": 0.1374, + "step": 22043, + "teacher_loss": 0.11589701473712921 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4094906449317932, + "learning_rate": 5.112801448633796e-06, + "loss": 0.2296, + "step": 22044, + "teacher_loss": 0.20965151488780975 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4453967809677124, + "learning_rate": 5.111093549251142e-06, + "loss": 0.1758, + "step": 22045, + "teacher_loss": 0.1458282470703125 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4276135265827179, + "learning_rate": 5.1093858765914685e-06, + "loss": 0.2277, + "step": 22046, + "teacher_loss": 0.20547285676002502 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.4745435118675232, + "learning_rate": 5.107678430693926e-06, + "loss": 0.1895, + "step": 22047, + "teacher_loss": 0.15780854225158691 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.5829769372940063, + "learning_rate": 5.105971211597645e-06, + "loss": 0.2268, + "step": 22048, + "teacher_loss": 0.18718942999839783 + }, + { + "compression_loss": 0.0, + "epoch": 3.98, + "label_loss": 0.714089035987854, + "learning_rate": 5.104264219341793e-06, + "loss": 0.2145, + "step": 22049, + "teacher_loss": 0.15894025564193726 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3871484398841858, + "learning_rate": 5.102557453965493e-06, + "loss": 0.2252, + "step": 22050, + "teacher_loss": 0.20721790194511414 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.41557231545448303, + "learning_rate": 5.10085091550787e-06, + "loss": 0.1955, + "step": 22051, + "teacher_loss": 0.17108920216560364 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.35114336013793945, + "learning_rate": 5.0991446040080574e-06, + "loss": 0.3063, + "step": 22052, + "teacher_loss": 0.30128806829452515 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.4830843508243561, + "learning_rate": 5.09743851950518e-06, + "loss": 0.1875, + "step": 22053, + "teacher_loss": 0.15468311309814453 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.34340670704841614, + "learning_rate": 5.095732662038342e-06, + "loss": 0.1934, + "step": 22054, + "teacher_loss": 0.17676490545272827 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5621516704559326, + "learning_rate": 5.0940270316466616e-06, + "loss": 0.2327, + "step": 22055, + "teacher_loss": 0.19607576727867126 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.47905653715133667, + "learning_rate": 5.092321628369245e-06, + "loss": 0.2119, + "step": 22056, + "teacher_loss": 0.1822408139705658 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.23314689099788666, + "learning_rate": 5.090616452245187e-06, + "loss": 0.1554, + "step": 22057, + "teacher_loss": 0.14675891399383545 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3631364703178406, + "learning_rate": 5.088911503313577e-06, + "loss": 0.1773, + "step": 22058, + "teacher_loss": 0.156688392162323 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.2871093153953552, + "learning_rate": 5.08720678161352e-06, + "loss": 0.1771, + "step": 22059, + "teacher_loss": 0.16484317183494568 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.705685019493103, + "learning_rate": 5.085502287184092e-06, + "loss": 0.2145, + "step": 22060, + "teacher_loss": 0.15992408990859985 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.40642642974853516, + "learning_rate": 5.08379802006436e-06, + "loss": 0.1829, + "step": 22061, + "teacher_loss": 0.1580895632505417 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6396265029907227, + "learning_rate": 5.0820939802934205e-06, + "loss": 0.225, + "step": 22062, + "teacher_loss": 0.17892920970916748 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5726832747459412, + "learning_rate": 5.0803901679103284e-06, + "loss": 0.2568, + "step": 22063, + "teacher_loss": 0.2217087745666504 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6497695446014404, + "learning_rate": 5.078686582954147e-06, + "loss": 0.2674, + "step": 22064, + "teacher_loss": 0.22495609521865845 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6412872672080994, + "learning_rate": 5.0769832254639355e-06, + "loss": 0.2952, + "step": 22065, + "teacher_loss": 0.25677546858787537 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5934469103813171, + "learning_rate": 5.075280095478753e-06, + "loss": 0.2043, + "step": 22066, + "teacher_loss": 0.16103440523147583 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.35248565673828125, + "learning_rate": 5.073577193037636e-06, + "loss": 0.2044, + "step": 22067, + "teacher_loss": 0.18800094723701477 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6676783561706543, + "learning_rate": 5.071874518179635e-06, + "loss": 0.3466, + "step": 22068, + "teacher_loss": 0.31092602014541626 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.43910035490989685, + "learning_rate": 5.0701720709437885e-06, + "loss": 0.351, + "step": 22069, + "teacher_loss": 0.341214656829834 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.10622549057006836, + "learning_rate": 5.0684698513691216e-06, + "loss": 0.1193, + "step": 22070, + "teacher_loss": 0.12073312699794769 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6832747459411621, + "learning_rate": 5.066767859494663e-06, + "loss": 0.225, + "step": 22071, + "teacher_loss": 0.17412303388118744 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.2978058159351349, + "learning_rate": 5.065066095359442e-06, + "loss": 0.2099, + "step": 22072, + "teacher_loss": 0.2001808136701584 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.30778956413269043, + "learning_rate": 5.0633645590024695e-06, + "loss": 0.1788, + "step": 22073, + "teacher_loss": 0.16451063752174377 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3444862961769104, + "learning_rate": 5.06166325046275e-06, + "loss": 0.1978, + "step": 22074, + "teacher_loss": 0.1815415620803833 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.4022221863269806, + "learning_rate": 5.059962169779297e-06, + "loss": 0.2447, + "step": 22075, + "teacher_loss": 0.22723780572414398 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.7403604984283447, + "learning_rate": 5.058261316991112e-06, + "loss": 0.2321, + "step": 22076, + "teacher_loss": 0.17568188905715942 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3576715588569641, + "learning_rate": 5.056560692137186e-06, + "loss": 0.2359, + "step": 22077, + "teacher_loss": 0.22232437133789062 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.48366469144821167, + "learning_rate": 5.05486029525651e-06, + "loss": 0.2332, + "step": 22078, + "teacher_loss": 0.20541802048683167 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.4213287830352783, + "learning_rate": 5.0531601263880755e-06, + "loss": 0.2065, + "step": 22079, + "teacher_loss": 0.18267206847667694 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.4027162790298462, + "learning_rate": 5.051460185570852e-06, + "loss": 0.2436, + "step": 22080, + "teacher_loss": 0.2258841097354889 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.24941398203372955, + "learning_rate": 5.049760472843819e-06, + "loss": 0.2356, + "step": 22081, + "teacher_loss": 0.23404951393604279 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.49006187915802, + "learning_rate": 5.048060988245951e-06, + "loss": 0.2377, + "step": 22082, + "teacher_loss": 0.20965850353240967 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6824772357940674, + "learning_rate": 5.046361731816203e-06, + "loss": 0.6974, + "step": 22083, + "teacher_loss": 0.6990044713020325 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.23311832547187805, + "learning_rate": 5.044662703593542e-06, + "loss": 0.1614, + "step": 22084, + "teacher_loss": 0.1534784734249115 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3675001561641693, + "learning_rate": 5.0429639036169134e-06, + "loss": 0.2496, + "step": 22085, + "teacher_loss": 0.23654364049434662 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3297327756881714, + "learning_rate": 5.041265331925269e-06, + "loss": 0.1691, + "step": 22086, + "teacher_loss": 0.15125374495983124 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.2938859462738037, + "learning_rate": 5.039566988557557e-06, + "loss": 0.212, + "step": 22087, + "teacher_loss": 0.2029523253440857 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.324543297290802, + "learning_rate": 5.037868873552708e-06, + "loss": 0.2533, + "step": 22088, + "teacher_loss": 0.24543911218643188 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.47561901807785034, + "learning_rate": 5.036170986949656e-06, + "loss": 0.3514, + "step": 22089, + "teacher_loss": 0.3375966548919678 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6222383975982666, + "learning_rate": 5.034473328787337e-06, + "loss": 0.296, + "step": 22090, + "teacher_loss": 0.25972336530685425 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5484682321548462, + "learning_rate": 5.03277589910466e-06, + "loss": 0.3056, + "step": 22091, + "teacher_loss": 0.2786676287651062 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.1844518780708313, + "learning_rate": 5.031078697940548e-06, + "loss": 0.1513, + "step": 22092, + "teacher_loss": 0.14764569699764252 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6276397705078125, + "learning_rate": 5.029381725333918e-06, + "loss": 0.2538, + "step": 22093, + "teacher_loss": 0.21228352189064026 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5493172407150269, + "learning_rate": 5.027684981323668e-06, + "loss": 0.253, + "step": 22094, + "teacher_loss": 0.22011929750442505 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.30760014057159424, + "learning_rate": 5.025988465948707e-06, + "loss": 0.1746, + "step": 22095, + "teacher_loss": 0.1598581224679947 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.39357078075408936, + "learning_rate": 5.024292179247921e-06, + "loss": 0.2362, + "step": 22096, + "teacher_loss": 0.2187683880329132 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.49585866928100586, + "learning_rate": 5.0225961212602125e-06, + "loss": 0.209, + "step": 22097, + "teacher_loss": 0.1771656572818756 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.6238361597061157, + "learning_rate": 5.020900292024456e-06, + "loss": 0.2343, + "step": 22098, + "teacher_loss": 0.19098469614982605 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.414753794670105, + "learning_rate": 5.019204691579538e-06, + "loss": 0.4912, + "step": 22099, + "teacher_loss": 0.4996855854988098 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.4031832218170166, + "learning_rate": 5.017509319964335e-06, + "loss": 0.3217, + "step": 22100, + "teacher_loss": 0.3126044273376465 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5616793036460876, + "learning_rate": 5.0158141772177124e-06, + "loss": 0.2774, + "step": 22101, + "teacher_loss": 0.24584342539310455 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.3253864049911499, + "learning_rate": 5.014119263378534e-06, + "loss": 0.2901, + "step": 22102, + "teacher_loss": 0.2861271798610687 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.7458573579788208, + "learning_rate": 5.012424578485667e-06, + "loss": 0.2807, + "step": 22103, + "teacher_loss": 0.2290249466896057 + }, + { + "compression_loss": 0.0, + "epoch": 3.99, + "label_loss": 0.5150573253631592, + "learning_rate": 5.010730122577956e-06, + "loss": 0.213, + "step": 22104, + "teacher_loss": 0.17945709824562073 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.48404985666275024, + "learning_rate": 5.009035895694254e-06, + "loss": 0.2648, + "step": 22105, + "teacher_loss": 0.24043257534503937 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.40745002031326294, + "learning_rate": 5.007341897873409e-06, + "loss": 0.1966, + "step": 22106, + "teacher_loss": 0.17316630482673645 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.39655566215515137, + "learning_rate": 5.005648129154254e-06, + "loss": 0.1663, + "step": 22107, + "teacher_loss": 0.14074309170246124 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.5433064103126526, + "learning_rate": 5.003954589575614e-06, + "loss": 0.2166, + "step": 22108, + "teacher_loss": 0.1803458333015442 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.2639385461807251, + "learning_rate": 5.002261279176335e-06, + "loss": 0.198, + "step": 22109, + "teacher_loss": 0.19067248702049255 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.44001516699790955, + "learning_rate": 5.0005681979952304e-06, + "loss": 0.2026, + "step": 22110, + "teacher_loss": 0.17626118659973145 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.5811282396316528, + "learning_rate": 4.9988753460711066e-06, + "loss": 0.2311, + "step": 22111, + "teacher_loss": 0.19220075011253357 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.41958028078079224, + "learning_rate": 4.997182723442798e-06, + "loss": 0.1649, + "step": 22112, + "teacher_loss": 0.13656926155090332 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.4574601948261261, + "learning_rate": 4.995490330149097e-06, + "loss": 0.2939, + "step": 22113, + "teacher_loss": 0.275709867477417 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.3676756024360657, + "learning_rate": 4.993798166228804e-06, + "loss": 0.2762, + "step": 22114, + "teacher_loss": 0.2660888433456421 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.8565565347671509, + "learning_rate": 4.992106231720719e-06, + "loss": 0.4078, + "step": 22115, + "teacher_loss": 0.35796058177948 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.21084296703338623, + "learning_rate": 4.990414526663636e-06, + "loss": 0.1818, + "step": 22116, + "teacher_loss": 0.1785779595375061 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.30923327803611755, + "learning_rate": 4.988723051096335e-06, + "loss": 0.2107, + "step": 22117, + "teacher_loss": 0.19969907402992249 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.29472821950912476, + "learning_rate": 4.987031805057598e-06, + "loss": 0.2201, + "step": 22118, + "teacher_loss": 0.21175909042358398 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.2837311029434204, + "learning_rate": 4.985340788586207e-06, + "loss": 0.1603, + "step": 22119, + "teacher_loss": 0.1465539038181305 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.8274620175361633, + "learning_rate": 4.9836500017209254e-06, + "loss": 0.3808, + "step": 22120, + "teacher_loss": 0.3311272859573364 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.9161056876182556, + "learning_rate": 4.981959444500509e-06, + "loss": 0.8852, + "step": 22121, + "teacher_loss": 0.8818002939224243 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.4583396911621094, + "learning_rate": 4.980269116963737e-06, + "loss": 0.265, + "step": 22122, + "teacher_loss": 0.2434912621974945 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.5375685095787048, + "learning_rate": 4.978579019149353e-06, + "loss": 0.2259, + "step": 22123, + "teacher_loss": 0.19121941924095154 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.2717920243740082, + "learning_rate": 4.976889151096101e-06, + "loss": 0.2017, + "step": 22124, + "teacher_loss": 0.19388742744922638 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.1128486692905426, + "learning_rate": 4.97519951284273e-06, + "loss": 0.1835, + "step": 22125, + "teacher_loss": 0.19139564037322998 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.5337270498275757, + "learning_rate": 4.973510104427985e-06, + "loss": 0.23, + "step": 22126, + "teacher_loss": 0.19624271988868713 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.656831681728363, + "learning_rate": 4.9718209258905865e-06, + "loss": 0.2698, + "step": 22127, + "teacher_loss": 0.22680974006652832 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.33048298954963684, + "learning_rate": 4.970131977269267e-06, + "loss": 0.1934, + "step": 22128, + "teacher_loss": 0.1781637966632843 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.13731157779693604, + "learning_rate": 4.968443258602757e-06, + "loss": 0.1766, + "step": 22129, + "teacher_loss": 0.18100687861442566 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.734019935131073, + "learning_rate": 4.96675476992976e-06, + "loss": 0.3203, + "step": 22130, + "teacher_loss": 0.274295836687088 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.12270964682102203, + "learning_rate": 4.965066511288996e-06, + "loss": 0.1341, + "step": 22131, + "teacher_loss": 0.13538146018981934 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.6149711608886719, + "learning_rate": 4.963378482719175e-06, + "loss": 0.2414, + "step": 22132, + "teacher_loss": 0.19994506239891052 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.43186551332473755, + "learning_rate": 4.96169068425899e-06, + "loss": 0.1894, + "step": 22133, + "teacher_loss": 0.16248568892478943 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.44644060730934143, + "learning_rate": 4.960003115947141e-06, + "loss": 0.207, + "step": 22134, + "teacher_loss": 0.18039348721504211 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.5457695722579956, + "learning_rate": 4.958315777822323e-06, + "loss": 0.343, + "step": 22135, + "teacher_loss": 0.32048556208610535 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.20110633969306946, + "learning_rate": 4.956628669923218e-06, + "loss": 0.1596, + "step": 22136, + "teacher_loss": 0.1549452543258667 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.23813612759113312, + "learning_rate": 4.954941792288502e-06, + "loss": 0.2076, + "step": 22137, + "teacher_loss": 0.2042200267314911 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.20629869401454926, + "learning_rate": 4.953255144956853e-06, + "loss": 0.1569, + "step": 22138, + "teacher_loss": 0.1514492928981781 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.29561883211135864, + "learning_rate": 4.951568727966947e-06, + "loss": 0.2015, + "step": 22139, + "teacher_loss": 0.1910032331943512 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.42475250363349915, + "learning_rate": 4.9498825413574385e-06, + "loss": 0.205, + "step": 22140, + "teacher_loss": 0.18053413927555084 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.10949330031871796, + "learning_rate": 4.948196585166991e-06, + "loss": 0.162, + "step": 22141, + "teacher_loss": 0.16781732439994812 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.16784986853599548, + "learning_rate": 4.946510859434265e-06, + "loss": 0.1941, + "step": 22142, + "teacher_loss": 0.1970541775226593 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.5954061150550842, + "learning_rate": 4.944825364197897e-06, + "loss": 0.2177, + "step": 22143, + "teacher_loss": 0.1757291555404663 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.6066509485244751, + "learning_rate": 4.9431400994965375e-06, + "loss": 0.3274, + "step": 22144, + "teacher_loss": 0.29642537236213684 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.19768646359443665, + "learning_rate": 4.941455065368828e-06, + "loss": 0.1734, + "step": 22145, + "teacher_loss": 0.17072495818138123 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.27365177869796753, + "learning_rate": 4.939770261853391e-06, + "loss": 0.1688, + "step": 22146, + "teacher_loss": 0.15717965364456177 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.7069647908210754, + "learning_rate": 4.938085688988865e-06, + "loss": 0.2357, + "step": 22147, + "teacher_loss": 0.18336597084999084 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.2205541431903839, + "learning_rate": 4.936401346813864e-06, + "loss": 0.1608, + "step": 22148, + "teacher_loss": 0.15416598320007324 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.37163227796554565, + "learning_rate": 4.934717235367006e-06, + "loss": 0.2136, + "step": 22149, + "teacher_loss": 0.19604641199111938 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.27454090118408203, + "learning_rate": 4.933033354686909e-06, + "loss": 0.191, + "step": 22150, + "teacher_loss": 0.18172410130500793 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.30184727907180786, + "learning_rate": 4.931349704812171e-06, + "loss": 0.1579, + "step": 22151, + "teacher_loss": 0.14191964268684387 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.322023868560791, + "learning_rate": 4.929666285781396e-06, + "loss": 0.2211, + "step": 22152, + "teacher_loss": 0.20988181233406067 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.2929700016975403, + "learning_rate": 4.927983097633188e-06, + "loss": 0.1634, + "step": 22153, + "teacher_loss": 0.14895889163017273 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.19951452314853668, + "learning_rate": 4.9263001404061225e-06, + "loss": 0.1384, + "step": 22154, + "teacher_loss": 0.13160760700702667 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.3469867706298828, + "learning_rate": 4.924617414138792e-06, + "loss": 0.1929, + "step": 22155, + "teacher_loss": 0.17578881978988647 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.37899789214134216, + "learning_rate": 4.922934918869784e-06, + "loss": 0.2222, + "step": 22156, + "teacher_loss": 0.2047813981771469 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.8178020715713501, + "learning_rate": 4.921252654637664e-06, + "loss": 0.4278, + "step": 22157, + "teacher_loss": 0.38449203968048096 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.418819397687912, + "learning_rate": 4.919570621480997e-06, + "loss": 0.1908, + "step": 22158, + "teacher_loss": 0.16549530625343323 + }, + { + "compression_loss": 0.0, + "epoch": 4.0, + "label_loss": 0.4267660975456238, + "learning_rate": 4.917888819438356e-06, + "loss": 0.2025, + "step": 22159, + "teacher_loss": 0.1775885671377182 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.4184418320655823, + "learning_rate": 4.916207248548299e-06, + "loss": 0.3032, + "step": 22160, + "teacher_loss": 0.2903625965118408 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.4742000699043274, + "learning_rate": 4.914525908849375e-06, + "loss": 0.2153, + "step": 22161, + "teacher_loss": 0.1865180879831314 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.39197659492492676, + "learning_rate": 4.912844800380131e-06, + "loss": 0.1761, + "step": 22162, + "teacher_loss": 0.15207619965076447 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.31735193729400635, + "learning_rate": 4.91116392317912e-06, + "loss": 0.2331, + "step": 22163, + "teacher_loss": 0.2237127721309662 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.6306535005569458, + "learning_rate": 4.909483277284864e-06, + "loss": 0.2714, + "step": 22164, + "teacher_loss": 0.23151788115501404 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.4461095631122589, + "learning_rate": 4.907802862735906e-06, + "loss": 0.181, + "step": 22165, + "teacher_loss": 0.15153281390666962 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.7646471261978149, + "learning_rate": 4.906122679570777e-06, + "loss": 0.3834, + "step": 22166, + "teacher_loss": 0.3410441279411316 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.19427461922168732, + "learning_rate": 4.9044427278279845e-06, + "loss": 0.1484, + "step": 22167, + "teacher_loss": 0.14329418540000916 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.20533442497253418, + "learning_rate": 4.902763007546052e-06, + "loss": 0.1525, + "step": 22168, + "teacher_loss": 0.14658068120479584 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.27081024646759033, + "learning_rate": 4.901083518763497e-06, + "loss": 0.149, + "step": 22169, + "teacher_loss": 0.13545003533363342 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.29067543148994446, + "learning_rate": 4.899404261518819e-06, + "loss": 0.1804, + "step": 22170, + "teacher_loss": 0.16812169551849365 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.13765586912631989, + "learning_rate": 4.897725235850506e-06, + "loss": 0.1739, + "step": 22171, + "teacher_loss": 0.17787493765354156 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.15603819489479065, + "learning_rate": 4.896046441797077e-06, + "loss": 0.1948, + "step": 22172, + "teacher_loss": 0.19908232986927032 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3383418917655945, + "learning_rate": 4.89436787939701e-06, + "loss": 0.2161, + "step": 22173, + "teacher_loss": 0.20252057909965515 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.15133023262023926, + "learning_rate": 4.892689548688784e-06, + "loss": 0.1487, + "step": 22174, + "teacher_loss": 0.14836657047271729 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.21806451678276062, + "learning_rate": 4.891011449710882e-06, + "loss": 0.165, + "step": 22175, + "teacher_loss": 0.1591041088104248 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 1.5460799932479858, + "learning_rate": 4.889333582501786e-06, + "loss": 0.3248, + "step": 22176, + "teacher_loss": 0.18913854658603668 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.24715489149093628, + "learning_rate": 4.887655947099952e-06, + "loss": 0.2325, + "step": 22177, + "teacher_loss": 0.23091718554496765 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.6276915073394775, + "learning_rate": 4.885978543543848e-06, + "loss": 0.235, + "step": 22178, + "teacher_loss": 0.19139115512371063 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.29744383692741394, + "learning_rate": 4.884301371871938e-06, + "loss": 0.1855, + "step": 22179, + "teacher_loss": 0.17307361960411072 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.5255406498908997, + "learning_rate": 4.882624432122669e-06, + "loss": 0.1783, + "step": 22180, + "teacher_loss": 0.1397274136543274 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.4809519052505493, + "learning_rate": 4.880947724334479e-06, + "loss": 0.1826, + "step": 22181, + "teacher_loss": 0.14943277835845947 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.702721118927002, + "learning_rate": 4.879271248545829e-06, + "loss": 0.2333, + "step": 22182, + "teacher_loss": 0.18114346265792847 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.25440531969070435, + "learning_rate": 4.877595004795145e-06, + "loss": 0.1814, + "step": 22183, + "teacher_loss": 0.17327424883842468 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3782019019126892, + "learning_rate": 4.8759189931208495e-06, + "loss": 0.2086, + "step": 22184, + "teacher_loss": 0.18980836868286133 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.14903101325035095, + "learning_rate": 4.874243213561389e-06, + "loss": 0.1521, + "step": 22185, + "teacher_loss": 0.15243589878082275 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.330823689699173, + "learning_rate": 4.8725676661551705e-06, + "loss": 0.1907, + "step": 22186, + "teacher_loss": 0.17509052157402039 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3414389491081238, + "learning_rate": 4.870892350940609e-06, + "loss": 0.1675, + "step": 22187, + "teacher_loss": 0.1481526494026184 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.2518971562385559, + "learning_rate": 4.869217267956116e-06, + "loss": 0.1796, + "step": 22188, + "teacher_loss": 0.17159605026245117 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.30308133363723755, + "learning_rate": 4.867542417240102e-06, + "loss": 0.3065, + "step": 22189, + "teacher_loss": 0.3068299889564514 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.5366024971008301, + "learning_rate": 4.8658677988309585e-06, + "loss": 0.2319, + "step": 22190, + "teacher_loss": 0.19801317155361176 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.2289092242717743, + "learning_rate": 4.864193412767081e-06, + "loss": 0.162, + "step": 22191, + "teacher_loss": 0.15461131930351257 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.28311195969581604, + "learning_rate": 4.862519259086866e-06, + "loss": 0.1996, + "step": 22192, + "teacher_loss": 0.1903626173734665 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3824400305747986, + "learning_rate": 4.860845337828684e-06, + "loss": 0.2287, + "step": 22193, + "teacher_loss": 0.21161530911922455 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.2799724340438843, + "learning_rate": 4.859171649030922e-06, + "loss": 0.1846, + "step": 22194, + "teacher_loss": 0.1739773005247116 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.28905272483825684, + "learning_rate": 4.857498192731955e-06, + "loss": 0.2345, + "step": 22195, + "teacher_loss": 0.22843128442764282 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 1.1945858001708984, + "learning_rate": 4.855824968970138e-06, + "loss": 0.285, + "step": 22196, + "teacher_loss": 0.18390268087387085 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3967694640159607, + "learning_rate": 4.8541519777838475e-06, + "loss": 0.2765, + "step": 22197, + "teacher_loss": 0.2631611227989197 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3093956708908081, + "learning_rate": 4.8524792192114286e-06, + "loss": 0.2273, + "step": 22198, + "teacher_loss": 0.2181403785943985 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3573721647262573, + "learning_rate": 4.850806693291242e-06, + "loss": 0.1798, + "step": 22199, + "teacher_loss": 0.16004875302314758 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.38605546951293945, + "learning_rate": 4.8491344000616235e-06, + "loss": 0.1749, + "step": 22200, + "teacher_loss": 0.15141837298870087 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.5034204125404358, + "learning_rate": 4.84746233956092e-06, + "loss": 0.2506, + "step": 22201, + "teacher_loss": 0.22250115871429443 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3418252766132355, + "learning_rate": 4.845790511827472e-06, + "loss": 0.2086, + "step": 22202, + "teacher_loss": 0.19384264945983887 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.4632079005241394, + "learning_rate": 4.844118916899597e-06, + "loss": 0.2187, + "step": 22203, + "teacher_loss": 0.1915285289287567 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.37468206882476807, + "learning_rate": 4.8424475548156274e-06, + "loss": 0.2158, + "step": 22204, + "teacher_loss": 0.19809266924858093 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.5227788686752319, + "learning_rate": 4.840776425613887e-06, + "loss": 0.2362, + "step": 22205, + "teacher_loss": 0.20431622862815857 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.8319765329360962, + "learning_rate": 4.839105529332677e-06, + "loss": 0.27, + "step": 22206, + "teacher_loss": 0.20752760767936707 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.7293442487716675, + "learning_rate": 4.83743486601032e-06, + "loss": 0.2774, + "step": 22207, + "teacher_loss": 0.22717750072479248 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3652310073375702, + "learning_rate": 4.8357644356851076e-06, + "loss": 0.2014, + "step": 22208, + "teacher_loss": 0.18323367834091187 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3104920983314514, + "learning_rate": 4.834094238395343e-06, + "loss": 0.2243, + "step": 22209, + "teacher_loss": 0.21474528312683105 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.2556689977645874, + "learning_rate": 4.832424274179321e-06, + "loss": 0.1917, + "step": 22210, + "teacher_loss": 0.18454432487487793 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.509351909160614, + "learning_rate": 4.830754543075324e-06, + "loss": 0.2191, + "step": 22211, + "teacher_loss": 0.18682368099689484 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.732737123966217, + "learning_rate": 4.829085045121636e-06, + "loss": 0.375, + "step": 22212, + "teacher_loss": 0.3352881669998169 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.3403262495994568, + "learning_rate": 4.827415780356539e-06, + "loss": 0.2194, + "step": 22213, + "teacher_loss": 0.20597070455551147 + }, + { + "compression_loss": 0.0, + "epoch": 4.01, + "label_loss": 0.32109522819519043, + "learning_rate": 4.825746748818293e-06, + "loss": 0.4109, + "step": 22214, + "teacher_loss": 0.4208357334136963 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.43766844272613525, + "learning_rate": 4.824077950545171e-06, + "loss": 0.2099, + "step": 22215, + "teacher_loss": 0.18455472588539124 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.3821280002593994, + "learning_rate": 4.822409385575436e-06, + "loss": 0.2337, + "step": 22216, + "teacher_loss": 0.21716630458831787 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.6813108325004578, + "learning_rate": 4.820741053947337e-06, + "loss": 0.2415, + "step": 22217, + "teacher_loss": 0.1926506608724594 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 1.1136844158172607, + "learning_rate": 4.819072955699131e-06, + "loss": 0.7076, + "step": 22218, + "teacher_loss": 0.6625146865844727 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.44034433364868164, + "learning_rate": 4.817405090869053e-06, + "loss": 0.2625, + "step": 22219, + "teacher_loss": 0.2427157163619995 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.3190616965293884, + "learning_rate": 4.815737459495353e-06, + "loss": 0.2022, + "step": 22220, + "teacher_loss": 0.18924956023693085 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.14926780760288239, + "learning_rate": 4.814070061616253e-06, + "loss": 0.1387, + "step": 22221, + "teacher_loss": 0.13756847381591797 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.328657329082489, + "learning_rate": 4.812402897269988e-06, + "loss": 0.2141, + "step": 22222, + "teacher_loss": 0.20142537355422974 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.39032572507858276, + "learning_rate": 4.810735966494784e-06, + "loss": 0.1925, + "step": 22223, + "teacher_loss": 0.1705411672592163 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.29390084743499756, + "learning_rate": 4.809069269328851e-06, + "loss": 0.1994, + "step": 22224, + "teacher_loss": 0.1888597458600998 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.5845149755477905, + "learning_rate": 4.807402805810407e-06, + "loss": 0.2422, + "step": 22225, + "teacher_loss": 0.20420344173908234 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.8635436296463013, + "learning_rate": 4.805736575977661e-06, + "loss": 0.2249, + "step": 22226, + "teacher_loss": 0.15398788452148438 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.14236120879650116, + "learning_rate": 4.804070579868808e-06, + "loss": 0.1922, + "step": 22227, + "teacher_loss": 0.1976958066225052 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.37992843985557556, + "learning_rate": 4.802404817522047e-06, + "loss": 0.2011, + "step": 22228, + "teacher_loss": 0.18126875162124634 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.7497575283050537, + "learning_rate": 4.800739288975575e-06, + "loss": 0.2285, + "step": 22229, + "teacher_loss": 0.17053581774234772 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.4402177929878235, + "learning_rate": 4.799073994267571e-06, + "loss": 0.3145, + "step": 22230, + "teacher_loss": 0.30058297514915466 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.177322119474411, + "learning_rate": 4.797408933436207e-06, + "loss": 0.1582, + "step": 22231, + "teacher_loss": 0.15603359043598175 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.22170838713645935, + "learning_rate": 4.795744106519678e-06, + "loss": 0.1536, + "step": 22232, + "teacher_loss": 0.1460103988647461 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.7143243551254272, + "learning_rate": 4.794079513556141e-06, + "loss": 0.3014, + "step": 22233, + "teacher_loss": 0.2555413842201233 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.5361828804016113, + "learning_rate": 4.792415154583753e-06, + "loss": 0.2426, + "step": 22234, + "teacher_loss": 0.20994088053703308 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.2834022045135498, + "learning_rate": 4.79075102964069e-06, + "loss": 0.144, + "step": 22235, + "teacher_loss": 0.1284627616405487 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 1.0938578844070435, + "learning_rate": 4.789087138765099e-06, + "loss": 0.2609, + "step": 22236, + "teacher_loss": 0.16839095950126648 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.30538052320480347, + "learning_rate": 4.78742348199512e-06, + "loss": 0.2016, + "step": 22237, + "teacher_loss": 0.19008949398994446 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.8319786190986633, + "learning_rate": 4.785760059368902e-06, + "loss": 0.3755, + "step": 22238, + "teacher_loss": 0.32474032044410706 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.36674243211746216, + "learning_rate": 4.784096870924586e-06, + "loss": 0.1629, + "step": 22239, + "teacher_loss": 0.1402583122253418 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.7657642364501953, + "learning_rate": 4.7824339167002954e-06, + "loss": 0.3325, + "step": 22240, + "teacher_loss": 0.28433090448379517 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.13501110672950745, + "learning_rate": 4.7807711967341626e-06, + "loss": 0.1691, + "step": 22241, + "teacher_loss": 0.17286773025989532 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.32190483808517456, + "learning_rate": 4.779108711064311e-06, + "loss": 0.2231, + "step": 22242, + "teacher_loss": 0.21207579970359802 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.3867034316062927, + "learning_rate": 4.7774464597288535e-06, + "loss": 0.1813, + "step": 22243, + "teacher_loss": 0.15851646661758423 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.26163506507873535, + "learning_rate": 4.77578444276589e-06, + "loss": 0.2059, + "step": 22244, + "teacher_loss": 0.19967812299728394 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 1.0899204015731812, + "learning_rate": 4.774122660213543e-06, + "loss": 0.3464, + "step": 22245, + "teacher_loss": 0.2637593746185303 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.2081674039363861, + "learning_rate": 4.772461112109908e-06, + "loss": 0.1595, + "step": 22246, + "teacher_loss": 0.1541392207145691 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.42645394802093506, + "learning_rate": 4.7707997984930685e-06, + "loss": 0.2538, + "step": 22247, + "teacher_loss": 0.23458853363990784 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.2986205816268921, + "learning_rate": 4.769138719401122e-06, + "loss": 0.2081, + "step": 22248, + "teacher_loss": 0.19806770980358124 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.4007059633731842, + "learning_rate": 4.7674778748721565e-06, + "loss": 0.2158, + "step": 22249, + "teacher_loss": 0.1952553540468216 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.666324257850647, + "learning_rate": 4.765817264944239e-06, + "loss": 0.3621, + "step": 22250, + "teacher_loss": 0.32832300662994385 + }, + { + "epoch": 4.02, + "eval_exact_match": 80.6717123935667, + "eval_f1": 87.92336774801645, + "step": 22250 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.6306180953979492, + "learning_rate": 4.764156889655449e-06, + "loss": 0.2465, + "step": 22251, + "teacher_loss": 0.20385517179965973 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.2822083830833435, + "learning_rate": 4.762496749043856e-06, + "loss": 0.1333, + "step": 22252, + "teacher_loss": 0.1167861819267273 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.4688491225242615, + "learning_rate": 4.760836843147515e-06, + "loss": 0.2075, + "step": 22253, + "teacher_loss": 0.1784694939851761 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.5995774865150452, + "learning_rate": 4.759177172004487e-06, + "loss": 0.318, + "step": 22254, + "teacher_loss": 0.28672486543655396 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.24449220299720764, + "learning_rate": 4.7575177356528255e-06, + "loss": 0.1813, + "step": 22255, + "teacher_loss": 0.17430609464645386 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.6079674363136292, + "learning_rate": 4.75585853413057e-06, + "loss": 0.2481, + "step": 22256, + "teacher_loss": 0.20806127786636353 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.4405941963195801, + "learning_rate": 4.754199567475766e-06, + "loss": 0.307, + "step": 22257, + "teacher_loss": 0.2921597957611084 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.28500232100486755, + "learning_rate": 4.752540835726452e-06, + "loss": 0.2252, + "step": 22258, + "teacher_loss": 0.21851998567581177 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.3956871032714844, + "learning_rate": 4.750882338920648e-06, + "loss": 0.1748, + "step": 22259, + "teacher_loss": 0.1503017544746399 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.24632880091667175, + "learning_rate": 4.749224077096388e-06, + "loss": 0.2292, + "step": 22260, + "teacher_loss": 0.22730940580368042 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.9285036325454712, + "learning_rate": 4.747566050291683e-06, + "loss": 0.2933, + "step": 22261, + "teacher_loss": 0.22268739342689514 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.532642126083374, + "learning_rate": 4.745908258544553e-06, + "loss": 0.1827, + "step": 22262, + "teacher_loss": 0.14382728934288025 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.29160061478614807, + "learning_rate": 4.744250701892999e-06, + "loss": 0.231, + "step": 22263, + "teacher_loss": 0.2242424190044403 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.42184627056121826, + "learning_rate": 4.742593380375028e-06, + "loss": 0.2248, + "step": 22264, + "teacher_loss": 0.20291900634765625 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.4828150272369385, + "learning_rate": 4.740936294028643e-06, + "loss": 0.2277, + "step": 22265, + "teacher_loss": 0.1993836760520935 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.6507429480552673, + "learning_rate": 4.739279442891826e-06, + "loss": 0.2558, + "step": 22266, + "teacher_loss": 0.21187695860862732 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.48997190594673157, + "learning_rate": 4.737622827002567e-06, + "loss": 0.199, + "step": 22267, + "teacher_loss": 0.16669908165931702 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.6850756406784058, + "learning_rate": 4.735966446398854e-06, + "loss": 0.2921, + "step": 22268, + "teacher_loss": 0.24838471412658691 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.2358751744031906, + "learning_rate": 4.734310301118652e-06, + "loss": 0.1663, + "step": 22269, + "teacher_loss": 0.15861360728740692 + }, + { + "compression_loss": 0.0, + "epoch": 4.02, + "label_loss": 0.9026320576667786, + "learning_rate": 4.732654391199941e-06, + "loss": 0.3017, + "step": 22270, + "teacher_loss": 0.23489803075790405 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.597624659538269, + "learning_rate": 4.7309987166806775e-06, + "loss": 0.2318, + "step": 22271, + "teacher_loss": 0.1911410391330719 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.2670944929122925, + "learning_rate": 4.729343277598825e-06, + "loss": 0.1829, + "step": 22272, + "teacher_loss": 0.17360037565231323 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5493311882019043, + "learning_rate": 4.727688073992344e-06, + "loss": 0.2722, + "step": 22273, + "teacher_loss": 0.2413557469844818 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.23454922437667847, + "learning_rate": 4.7260331058991715e-06, + "loss": 0.1652, + "step": 22274, + "teacher_loss": 0.15748630464076996 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.3663978576660156, + "learning_rate": 4.724378373357257e-06, + "loss": 0.2219, + "step": 22275, + "teacher_loss": 0.20589905977249146 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.3661792576313019, + "learning_rate": 4.722723876404544e-06, + "loss": 0.1709, + "step": 22276, + "teacher_loss": 0.14922773838043213 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.23750965297222137, + "learning_rate": 4.7210696150789545e-06, + "loss": 0.1558, + "step": 22277, + "teacher_loss": 0.14669647812843323 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5710964798927307, + "learning_rate": 4.7194155894184205e-06, + "loss": 0.2208, + "step": 22278, + "teacher_loss": 0.18187859654426575 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5654985308647156, + "learning_rate": 4.71776179946087e-06, + "loss": 0.22, + "step": 22279, + "teacher_loss": 0.181605726480484 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.7033708095550537, + "learning_rate": 4.716108245244214e-06, + "loss": 0.2089, + "step": 22280, + "teacher_loss": 0.1539667695760727 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.1005990132689476, + "learning_rate": 4.714454926806356e-06, + "loss": 0.1647, + "step": 22281, + "teacher_loss": 0.17187516391277313 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.2601490020751953, + "learning_rate": 4.712801844185211e-06, + "loss": 0.1487, + "step": 22282, + "teacher_loss": 0.1363557130098343 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.2546847462654114, + "learning_rate": 4.71114899741868e-06, + "loss": 0.1441, + "step": 22283, + "teacher_loss": 0.13180966675281525 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.7071696519851685, + "learning_rate": 4.709496386544653e-06, + "loss": 0.2601, + "step": 22284, + "teacher_loss": 0.2104508876800537 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.24335068464279175, + "learning_rate": 4.707844011601019e-06, + "loss": 0.2214, + "step": 22285, + "teacher_loss": 0.21894752979278564 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.3953036069869995, + "learning_rate": 4.7061918726256695e-06, + "loss": 0.1994, + "step": 22286, + "teacher_loss": 0.17759236693382263 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.3916807174682617, + "learning_rate": 4.704539969656474e-06, + "loss": 0.2451, + "step": 22287, + "teacher_loss": 0.22876980900764465 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.9959553480148315, + "learning_rate": 4.702888302731309e-06, + "loss": 0.2062, + "step": 22288, + "teacher_loss": 0.11843357235193253 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.6659566760063171, + "learning_rate": 4.7012368718880476e-06, + "loss": 0.2842, + "step": 22289, + "teacher_loss": 0.24179702997207642 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.4169069826602936, + "learning_rate": 4.699585677164543e-06, + "loss": 0.2456, + "step": 22290, + "teacher_loss": 0.2265946865081787 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5631296038627625, + "learning_rate": 4.697934718598656e-06, + "loss": 0.2342, + "step": 22291, + "teacher_loss": 0.1976226270198822 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.2258257269859314, + "learning_rate": 4.696283996228243e-06, + "loss": 0.1927, + "step": 22292, + "teacher_loss": 0.18901114165782928 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.27129799127578735, + "learning_rate": 4.694633510091149e-06, + "loss": 0.2032, + "step": 22293, + "teacher_loss": 0.19562718272209167 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.34878307580947876, + "learning_rate": 4.692983260225199e-06, + "loss": 0.173, + "step": 22294, + "teacher_loss": 0.1535157561302185 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.813422679901123, + "learning_rate": 4.6913332466682505e-06, + "loss": 0.2603, + "step": 22295, + "teacher_loss": 0.19889700412750244 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.2348555624485016, + "learning_rate": 4.689683469458124e-06, + "loss": 0.1304, + "step": 22296, + "teacher_loss": 0.11883604526519775 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.2653898298740387, + "learning_rate": 4.688033928632639e-06, + "loss": 0.1923, + "step": 22297, + "teacher_loss": 0.1842237412929535 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.4472760260105133, + "learning_rate": 4.6863846242296195e-06, + "loss": 0.2418, + "step": 22298, + "teacher_loss": 0.2189258486032486 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5030593872070312, + "learning_rate": 4.684735556286883e-06, + "loss": 0.2077, + "step": 22299, + "teacher_loss": 0.17487642168998718 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5020591020584106, + "learning_rate": 4.683086724842228e-06, + "loss": 0.2611, + "step": 22300, + "teacher_loss": 0.23430484533309937 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.23359909653663635, + "learning_rate": 4.681438129933464e-06, + "loss": 0.1791, + "step": 22301, + "teacher_loss": 0.17300131916999817 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.1912221610546112, + "learning_rate": 4.679789771598392e-06, + "loss": 0.1693, + "step": 22302, + "teacher_loss": 0.16685792803764343 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5604729652404785, + "learning_rate": 4.678141649874798e-06, + "loss": 0.2874, + "step": 22303, + "teacher_loss": 0.2570732533931732 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.47085297107696533, + "learning_rate": 4.6764937648004595e-06, + "loss": 0.3578, + "step": 22304, + "teacher_loss": 0.34526288509368896 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.39772528409957886, + "learning_rate": 4.674846116413178e-06, + "loss": 0.1949, + "step": 22305, + "teacher_loss": 0.17238157987594604 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5819420218467712, + "learning_rate": 4.6731987047507185e-06, + "loss": 0.3031, + "step": 22306, + "teacher_loss": 0.2721473276615143 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.9132729768753052, + "learning_rate": 4.671551529850841e-06, + "loss": 0.2894, + "step": 22307, + "teacher_loss": 0.22010083496570587 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.6451421976089478, + "learning_rate": 4.669904591751332e-06, + "loss": 0.2489, + "step": 22308, + "teacher_loss": 0.20490939915180206 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.43649476766586304, + "learning_rate": 4.668257890489937e-06, + "loss": 0.2071, + "step": 22309, + "teacher_loss": 0.18164560198783875 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.6438288688659668, + "learning_rate": 4.666611426104409e-06, + "loss": 0.2401, + "step": 22310, + "teacher_loss": 0.19519805908203125 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.299312949180603, + "learning_rate": 4.6649651986325e-06, + "loss": 0.1781, + "step": 22311, + "teacher_loss": 0.1645796149969101 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.6446738243103027, + "learning_rate": 4.6633192081119575e-06, + "loss": 0.3151, + "step": 22312, + "teacher_loss": 0.2784823775291443 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.27618521451950073, + "learning_rate": 4.66167345458051e-06, + "loss": 0.2094, + "step": 22313, + "teacher_loss": 0.20192891359329224 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5535421967506409, + "learning_rate": 4.660027938075894e-06, + "loss": 0.257, + "step": 22314, + "teacher_loss": 0.22406136989593506 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5050574541091919, + "learning_rate": 4.658382658635841e-06, + "loss": 0.2413, + "step": 22315, + "teacher_loss": 0.2119983732700348 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.5441836714744568, + "learning_rate": 4.656737616298065e-06, + "loss": 0.2736, + "step": 22316, + "teacher_loss": 0.24349018931388855 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.35144421458244324, + "learning_rate": 4.655092811100282e-06, + "loss": 0.149, + "step": 22317, + "teacher_loss": 0.12652841210365295 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.29485535621643066, + "learning_rate": 4.653448243080212e-06, + "loss": 0.2767, + "step": 22318, + "teacher_loss": 0.27469971776008606 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.3731991648674011, + "learning_rate": 4.651803912275548e-06, + "loss": 0.2161, + "step": 22319, + "teacher_loss": 0.19862470030784607 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.26870012283325195, + "learning_rate": 4.650159818723999e-06, + "loss": 0.1644, + "step": 22320, + "teacher_loss": 0.15275856852531433 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.48765650391578674, + "learning_rate": 4.6485159624632505e-06, + "loss": 0.1925, + "step": 22321, + "teacher_loss": 0.15975692868232727 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.4498612582683563, + "learning_rate": 4.646872343530999e-06, + "loss": 0.2502, + "step": 22322, + "teacher_loss": 0.22801800072193146 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.4751649498939514, + "learning_rate": 4.6452289619649225e-06, + "loss": 0.2234, + "step": 22323, + "teacher_loss": 0.1954212635755539 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.25882992148399353, + "learning_rate": 4.643585817802697e-06, + "loss": 0.1785, + "step": 22324, + "teacher_loss": 0.16959968209266663 + }, + { + "compression_loss": 0.0, + "epoch": 4.03, + "label_loss": 0.33566170930862427, + "learning_rate": 4.641942911082007e-06, + "loss": 0.2097, + "step": 22325, + "teacher_loss": 0.1956641972064972 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.2695721387863159, + "learning_rate": 4.640300241840505e-06, + "loss": 0.1841, + "step": 22326, + "teacher_loss": 0.17460983991622925 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.7575740218162537, + "learning_rate": 4.638657810115857e-06, + "loss": 0.2833, + "step": 22327, + "teacher_loss": 0.23062311112880707 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4344967007637024, + "learning_rate": 4.637015615945727e-06, + "loss": 0.2398, + "step": 22328, + "teacher_loss": 0.2182224988937378 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.21146127581596375, + "learning_rate": 4.635373659367753e-06, + "loss": 0.1756, + "step": 22329, + "teacher_loss": 0.17163875699043274 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.3587035536766052, + "learning_rate": 4.633731940419592e-06, + "loss": 0.1914, + "step": 22330, + "teacher_loss": 0.172776460647583 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.3692079782485962, + "learning_rate": 4.632090459138872e-06, + "loss": 0.211, + "step": 22331, + "teacher_loss": 0.1934436708688736 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.9100313186645508, + "learning_rate": 4.6304492155632355e-06, + "loss": 0.3771, + "step": 22332, + "teacher_loss": 0.3178538382053375 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.47658199071884155, + "learning_rate": 4.628808209730311e-06, + "loss": 0.1917, + "step": 22333, + "teacher_loss": 0.16002070903778076 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4166563153266907, + "learning_rate": 4.6271674416777164e-06, + "loss": 0.2006, + "step": 22334, + "teacher_loss": 0.17662595212459564 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4648454785346985, + "learning_rate": 4.625526911443073e-06, + "loss": 0.2122, + "step": 22335, + "teacher_loss": 0.18418218195438385 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.18360412120819092, + "learning_rate": 4.623886619063997e-06, + "loss": 0.1488, + "step": 22336, + "teacher_loss": 0.1448913961648941 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.23698677122592926, + "learning_rate": 4.6222465645780885e-06, + "loss": 0.1554, + "step": 22337, + "teacher_loss": 0.1463409960269928 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.6238726377487183, + "learning_rate": 4.620606748022952e-06, + "loss": 0.3252, + "step": 22338, + "teacher_loss": 0.2919802665710449 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.05942612513899803, + "learning_rate": 4.618967169436187e-06, + "loss": 0.1228, + "step": 22339, + "teacher_loss": 0.12981215119361877 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4003221392631531, + "learning_rate": 4.617327828855377e-06, + "loss": 0.178, + "step": 22340, + "teacher_loss": 0.15329578518867493 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.2753027081489563, + "learning_rate": 4.615688726318111e-06, + "loss": 0.2223, + "step": 22341, + "teacher_loss": 0.2163984477519989 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.7132440805435181, + "learning_rate": 4.614049861861974e-06, + "loss": 0.3096, + "step": 22342, + "teacher_loss": 0.2647353410720825 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.34624040126800537, + "learning_rate": 4.612411235524533e-06, + "loss": 0.1798, + "step": 22343, + "teacher_loss": 0.1613403856754303 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.41245031356811523, + "learning_rate": 4.610772847343358e-06, + "loss": 0.2347, + "step": 22344, + "teacher_loss": 0.21495160460472107 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.1924239695072174, + "learning_rate": 4.609134697356009e-06, + "loss": 0.1571, + "step": 22345, + "teacher_loss": 0.15320059657096863 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.7938008308410645, + "learning_rate": 4.607496785600054e-06, + "loss": 0.2877, + "step": 22346, + "teacher_loss": 0.23149684071540833 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.8822792768478394, + "learning_rate": 4.605859112113036e-06, + "loss": 0.3379, + "step": 22347, + "teacher_loss": 0.2774544358253479 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.7164634466171265, + "learning_rate": 4.604221676932503e-06, + "loss": 0.2591, + "step": 22348, + "teacher_loss": 0.20822957158088684 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4802509546279907, + "learning_rate": 4.602584480096005e-06, + "loss": 0.1825, + "step": 22349, + "teacher_loss": 0.14944472908973694 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.7466652989387512, + "learning_rate": 4.600947521641066e-06, + "loss": 0.2406, + "step": 22350, + "teacher_loss": 0.18442079424858093 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.929283618927002, + "learning_rate": 4.5993108016052235e-06, + "loss": 0.3235, + "step": 22351, + "teacher_loss": 0.25622570514678955 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.6867748498916626, + "learning_rate": 4.597674320026006e-06, + "loss": 0.4083, + "step": 22352, + "teacher_loss": 0.37731873989105225 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4594021439552307, + "learning_rate": 4.5960380769409284e-06, + "loss": 0.2243, + "step": 22353, + "teacher_loss": 0.19815891981124878 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.25068002939224243, + "learning_rate": 4.594402072387497e-06, + "loss": 0.1697, + "step": 22354, + "teacher_loss": 0.16070103645324707 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.43247419595718384, + "learning_rate": 4.592766306403235e-06, + "loss": 0.2135, + "step": 22355, + "teacher_loss": 0.18922486901283264 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.3742496371269226, + "learning_rate": 4.591130779025641e-06, + "loss": 0.1763, + "step": 22356, + "teacher_loss": 0.15431681275367737 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.5696107149124146, + "learning_rate": 4.5894954902922e-06, + "loss": 0.2285, + "step": 22357, + "teacher_loss": 0.19062277674674988 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.20192936062812805, + "learning_rate": 4.5878604402404254e-06, + "loss": 0.2141, + "step": 22358, + "teacher_loss": 0.21547850966453552 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.6407074332237244, + "learning_rate": 4.586225628907794e-06, + "loss": 0.2304, + "step": 22359, + "teacher_loss": 0.18479309976100922 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4709963798522949, + "learning_rate": 4.584591056331783e-06, + "loss": 0.1995, + "step": 22360, + "teacher_loss": 0.1693221926689148 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.3023056387901306, + "learning_rate": 4.5829567225498696e-06, + "loss": 0.1765, + "step": 22361, + "teacher_loss": 0.16253016889095306 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4718725085258484, + "learning_rate": 4.581322627599533e-06, + "loss": 0.2145, + "step": 22362, + "teacher_loss": 0.18591247498989105 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.45685333013534546, + "learning_rate": 4.579688771518227e-06, + "loss": 0.2357, + "step": 22363, + "teacher_loss": 0.2111477553844452 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4295060634613037, + "learning_rate": 4.578055154343414e-06, + "loss": 0.253, + "step": 22364, + "teacher_loss": 0.2333919107913971 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.34789755940437317, + "learning_rate": 4.576421776112556e-06, + "loss": 0.223, + "step": 22365, + "teacher_loss": 0.20916923880577087 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.29522159695625305, + "learning_rate": 4.574788636863097e-06, + "loss": 0.2359, + "step": 22366, + "teacher_loss": 0.22930538654327393 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.23254722356796265, + "learning_rate": 4.573155736632466e-06, + "loss": 0.2029, + "step": 22367, + "teacher_loss": 0.1996101438999176 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.3475077152252197, + "learning_rate": 4.571523075458122e-06, + "loss": 0.1533, + "step": 22368, + "teacher_loss": 0.13172005116939545 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.5157480835914612, + "learning_rate": 4.569890653377488e-06, + "loss": 0.2094, + "step": 22369, + "teacher_loss": 0.17536842823028564 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.17539718747138977, + "learning_rate": 4.568258470427987e-06, + "loss": 0.1548, + "step": 22370, + "teacher_loss": 0.15246671438217163 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.5434921383857727, + "learning_rate": 4.566626526647041e-06, + "loss": 0.2155, + "step": 22371, + "teacher_loss": 0.1790551245212555 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.058663271367549896, + "learning_rate": 4.564994822072075e-06, + "loss": 0.1603, + "step": 22372, + "teacher_loss": 0.1715439110994339 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.28227531909942627, + "learning_rate": 4.563363356740486e-06, + "loss": 0.135, + "step": 22373, + "teacher_loss": 0.11862976849079132 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.4497913420200348, + "learning_rate": 4.561732130689684e-06, + "loss": 0.2169, + "step": 22374, + "teacher_loss": 0.19096790254116058 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.06890691816806793, + "learning_rate": 4.5601011439570754e-06, + "loss": 0.1632, + "step": 22375, + "teacher_loss": 0.17366473376750946 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.41623252630233765, + "learning_rate": 4.558470396580041e-06, + "loss": 0.222, + "step": 22376, + "teacher_loss": 0.20041929185390472 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.32481563091278076, + "learning_rate": 4.556839888595974e-06, + "loss": 0.2185, + "step": 22377, + "teacher_loss": 0.20673654973506927 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.6112340092658997, + "learning_rate": 4.555209620042266e-06, + "loss": 0.2153, + "step": 22378, + "teacher_loss": 0.17132142186164856 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.558569073677063, + "learning_rate": 4.553579590956279e-06, + "loss": 0.2643, + "step": 22379, + "teacher_loss": 0.23162616789340973 + }, + { + "compression_loss": 0.0, + "epoch": 4.04, + "label_loss": 0.7636164426803589, + "learning_rate": 4.5519498013753916e-06, + "loss": 0.2577, + "step": 22380, + "teacher_loss": 0.20148514211177826 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5127445459365845, + "learning_rate": 4.550320251336976e-06, + "loss": 0.2081, + "step": 22381, + "teacher_loss": 0.1742207109928131 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.30378487706184387, + "learning_rate": 4.548690940878384e-06, + "loss": 0.1539, + "step": 22382, + "teacher_loss": 0.13729184865951538 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4069824814796448, + "learning_rate": 4.5470618700369755e-06, + "loss": 0.2329, + "step": 22383, + "teacher_loss": 0.2135481834411621 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.6297832727432251, + "learning_rate": 4.545433038850098e-06, + "loss": 0.2829, + "step": 22384, + "teacher_loss": 0.24433034658432007 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.24492287635803223, + "learning_rate": 4.543804447355098e-06, + "loss": 0.1612, + "step": 22385, + "teacher_loss": 0.1518622636795044 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.24344030022621155, + "learning_rate": 4.54217609558931e-06, + "loss": 0.1868, + "step": 22386, + "teacher_loss": 0.18047180771827698 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4224638342857361, + "learning_rate": 4.5405479835900695e-06, + "loss": 0.1933, + "step": 22387, + "teacher_loss": 0.16779302060604095 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 1.02317476272583, + "learning_rate": 4.5389201113947085e-06, + "loss": 0.2714, + "step": 22388, + "teacher_loss": 0.18782779574394226 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5103275775909424, + "learning_rate": 4.537292479040542e-06, + "loss": 0.1731, + "step": 22389, + "teacher_loss": 0.1356210708618164 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.40686067938804626, + "learning_rate": 4.535665086564888e-06, + "loss": 0.1667, + "step": 22390, + "teacher_loss": 0.13997262716293335 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4288334846496582, + "learning_rate": 4.534037934005066e-06, + "loss": 0.1891, + "step": 22391, + "teacher_loss": 0.16248804330825806 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4488275945186615, + "learning_rate": 4.532411021398371e-06, + "loss": 0.2532, + "step": 22392, + "teacher_loss": 0.23149588704109192 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.3356611430644989, + "learning_rate": 4.530784348782112e-06, + "loss": 0.2862, + "step": 22393, + "teacher_loss": 0.2807316780090332 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.38262325525283813, + "learning_rate": 4.529157916193576e-06, + "loss": 0.2096, + "step": 22394, + "teacher_loss": 0.19040557742118835 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.2759501039981842, + "learning_rate": 4.527531723670055e-06, + "loss": 0.1605, + "step": 22395, + "teacher_loss": 0.1477038562297821 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.9571385383605957, + "learning_rate": 4.525905771248838e-06, + "loss": 0.3184, + "step": 22396, + "teacher_loss": 0.24738846719264984 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5152538418769836, + "learning_rate": 4.524280058967193e-06, + "loss": 0.1901, + "step": 22397, + "teacher_loss": 0.15394222736358643 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.18781855702400208, + "learning_rate": 4.522654586862401e-06, + "loss": 0.2048, + "step": 22398, + "teacher_loss": 0.2067258656024933 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.3841584324836731, + "learning_rate": 4.5210293549717306e-06, + "loss": 0.2217, + "step": 22399, + "teacher_loss": 0.20366591215133667 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.48990312218666077, + "learning_rate": 4.5194043633324335e-06, + "loss": 0.3433, + "step": 22400, + "teacher_loss": 0.3269987106323242 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.24159112572669983, + "learning_rate": 4.517779611981772e-06, + "loss": 0.1774, + "step": 22401, + "teacher_loss": 0.1702936887741089 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5890333652496338, + "learning_rate": 4.516155100957002e-06, + "loss": 0.2186, + "step": 22402, + "teacher_loss": 0.17747971415519714 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.21373350918293, + "learning_rate": 4.514530830295365e-06, + "loss": 0.1832, + "step": 22403, + "teacher_loss": 0.17978642880916595 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.44638463854789734, + "learning_rate": 4.512906800034086e-06, + "loss": 0.2494, + "step": 22404, + "teacher_loss": 0.22746042907238007 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.6371122598648071, + "learning_rate": 4.511283010210423e-06, + "loss": 0.2331, + "step": 22405, + "teacher_loss": 0.18825319409370422 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4463879466056824, + "learning_rate": 4.509659460861595e-06, + "loss": 0.2554, + "step": 22406, + "teacher_loss": 0.23412543535232544 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.7087075710296631, + "learning_rate": 4.508036152024819e-06, + "loss": 0.256, + "step": 22407, + "teacher_loss": 0.20572713017463684 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5115741491317749, + "learning_rate": 4.506413083737317e-06, + "loss": 0.2166, + "step": 22408, + "teacher_loss": 0.1837860345840454 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.38696935772895813, + "learning_rate": 4.5047902560363045e-06, + "loss": 0.2245, + "step": 22409, + "teacher_loss": 0.20639248192310333 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.31366080045700073, + "learning_rate": 4.503167668958982e-06, + "loss": 0.1717, + "step": 22410, + "teacher_loss": 0.1559767872095108 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.24255573749542236, + "learning_rate": 4.501545322542555e-06, + "loss": 0.1482, + "step": 22411, + "teacher_loss": 0.137699156999588 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4071047306060791, + "learning_rate": 4.49992321682422e-06, + "loss": 0.2131, + "step": 22412, + "teacher_loss": 0.19159506261348724 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.46025973558425903, + "learning_rate": 4.498301351841162e-06, + "loss": 0.2592, + "step": 22413, + "teacher_loss": 0.2368142306804657 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.18871013820171356, + "learning_rate": 4.496679727630568e-06, + "loss": 0.1944, + "step": 22414, + "teacher_loss": 0.19505611062049866 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.553680419921875, + "learning_rate": 4.4950583442296205e-06, + "loss": 0.2328, + "step": 22415, + "teacher_loss": 0.19713759422302246 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.39101850986480713, + "learning_rate": 4.493437201675491e-06, + "loss": 0.2475, + "step": 22416, + "teacher_loss": 0.23158606886863708 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4335443675518036, + "learning_rate": 4.491816300005336e-06, + "loss": 0.2062, + "step": 22417, + "teacher_loss": 0.18089798092842102 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5334718823432922, + "learning_rate": 4.490195639256339e-06, + "loss": 0.1998, + "step": 22418, + "teacher_loss": 0.1627301722764969 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.3568463623523712, + "learning_rate": 4.488575219465645e-06, + "loss": 0.1916, + "step": 22419, + "teacher_loss": 0.17322498559951782 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.8238714933395386, + "learning_rate": 4.486955040670401e-06, + "loss": 0.2953, + "step": 22420, + "teacher_loss": 0.23651883006095886 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.18777135014533997, + "learning_rate": 4.485335102907758e-06, + "loss": 0.1873, + "step": 22421, + "teacher_loss": 0.18730252981185913 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.20914584398269653, + "learning_rate": 4.483715406214863e-06, + "loss": 0.2025, + "step": 22422, + "teacher_loss": 0.20180365443229675 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4354602098464966, + "learning_rate": 4.482095950628839e-06, + "loss": 0.2431, + "step": 22423, + "teacher_loss": 0.22169464826583862 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.35351261496543884, + "learning_rate": 4.480476736186819e-06, + "loss": 0.3197, + "step": 22424, + "teacher_loss": 0.31592828035354614 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.34121522307395935, + "learning_rate": 4.478857762925934e-06, + "loss": 0.1656, + "step": 22425, + "teacher_loss": 0.14605233073234558 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.303475558757782, + "learning_rate": 4.4772390308832966e-06, + "loss": 0.1665, + "step": 22426, + "teacher_loss": 0.15126320719718933 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5880388021469116, + "learning_rate": 4.47562054009601e-06, + "loss": 0.2808, + "step": 22427, + "teacher_loss": 0.2466752678155899 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.21170049905776978, + "learning_rate": 4.474002290601201e-06, + "loss": 0.1456, + "step": 22428, + "teacher_loss": 0.13823555409908295 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.6346510648727417, + "learning_rate": 4.4723842824359595e-06, + "loss": 0.3503, + "step": 22429, + "teacher_loss": 0.31865566968917847 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.2442774623632431, + "learning_rate": 4.470766515637373e-06, + "loss": 0.1453, + "step": 22430, + "teacher_loss": 0.13434451818466187 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.40366122126579285, + "learning_rate": 4.469148990242552e-06, + "loss": 0.2205, + "step": 22431, + "teacher_loss": 0.20015056431293488 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.42039209604263306, + "learning_rate": 4.467531706288573e-06, + "loss": 0.2402, + "step": 22432, + "teacher_loss": 0.22019259631633759 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.7823781967163086, + "learning_rate": 4.465914663812508e-06, + "loss": 0.2699, + "step": 22433, + "teacher_loss": 0.212961345911026 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.5453091859817505, + "learning_rate": 4.4642978628514365e-06, + "loss": 0.2198, + "step": 22434, + "teacher_loss": 0.18360097706317902 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.4284927248954773, + "learning_rate": 4.4626813034424325e-06, + "loss": 0.2054, + "step": 22435, + "teacher_loss": 0.18064826726913452 + }, + { + "compression_loss": 0.0, + "epoch": 4.05, + "label_loss": 0.451980859041214, + "learning_rate": 4.46106498562255e-06, + "loss": 0.1932, + "step": 22436, + "teacher_loss": 0.1644885241985321 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.22469471395015717, + "learning_rate": 4.459448909428848e-06, + "loss": 0.2062, + "step": 22437, + "teacher_loss": 0.20416519045829773 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.13847118616104126, + "learning_rate": 4.457833074898386e-06, + "loss": 0.1461, + "step": 22438, + "teacher_loss": 0.14699944853782654 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3379843235015869, + "learning_rate": 4.4562174820682e-06, + "loss": 0.1598, + "step": 22439, + "teacher_loss": 0.1399690955877304 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5982440710067749, + "learning_rate": 4.454602130975336e-06, + "loss": 0.2262, + "step": 22440, + "teacher_loss": 0.18484535813331604 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5035433769226074, + "learning_rate": 4.452987021656832e-06, + "loss": 0.1938, + "step": 22441, + "teacher_loss": 0.15940451622009277 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.25582355260849, + "learning_rate": 4.4513721541497095e-06, + "loss": 0.1827, + "step": 22442, + "teacher_loss": 0.17462529242038727 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.26584821939468384, + "learning_rate": 4.4497575284910015e-06, + "loss": 0.1533, + "step": 22443, + "teacher_loss": 0.1407933384180069 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.7291821241378784, + "learning_rate": 4.448143144717719e-06, + "loss": 0.3652, + "step": 22444, + "teacher_loss": 0.3247072100639343 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.35927897691726685, + "learning_rate": 4.446529002866877e-06, + "loss": 0.2322, + "step": 22445, + "teacher_loss": 0.21805424988269806 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.7203177809715271, + "learning_rate": 4.444915102975488e-06, + "loss": 0.245, + "step": 22446, + "teacher_loss": 0.19219619035720825 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5104303359985352, + "learning_rate": 4.4433014450805456e-06, + "loss": 0.2419, + "step": 22447, + "teacher_loss": 0.21200843155384064 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3425580859184265, + "learning_rate": 4.441688029219055e-06, + "loss": 0.1919, + "step": 22448, + "teacher_loss": 0.1751413643360138 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.8826892971992493, + "learning_rate": 4.440074855427998e-06, + "loss": 0.3009, + "step": 22449, + "teacher_loss": 0.23625552654266357 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.20078477263450623, + "learning_rate": 4.438461923744364e-06, + "loss": 0.1413, + "step": 22450, + "teacher_loss": 0.13474193215370178 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.34865784645080566, + "learning_rate": 4.43684923420514e-06, + "loss": 0.1958, + "step": 22451, + "teacher_loss": 0.17877960205078125 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.33222702145576477, + "learning_rate": 4.435236786847284e-06, + "loss": 0.2061, + "step": 22452, + "teacher_loss": 0.19213572144508362 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5211025476455688, + "learning_rate": 4.433624581707781e-06, + "loss": 0.2224, + "step": 22453, + "teacher_loss": 0.1892303079366684 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.8634397983551025, + "learning_rate": 4.432012618823583e-06, + "loss": 0.2921, + "step": 22454, + "teacher_loss": 0.22860601544380188 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.44478845596313477, + "learning_rate": 4.430400898231649e-06, + "loss": 0.2615, + "step": 22455, + "teacher_loss": 0.24108169972896576 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.35409748554229736, + "learning_rate": 4.428789419968939e-06, + "loss": 0.2423, + "step": 22456, + "teacher_loss": 0.2298848032951355 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.9793566465377808, + "learning_rate": 4.427178184072389e-06, + "loss": 0.2756, + "step": 22457, + "teacher_loss": 0.197415292263031 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3693322539329529, + "learning_rate": 4.425567190578943e-06, + "loss": 0.2036, + "step": 22458, + "teacher_loss": 0.18515688180923462 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.7170186042785645, + "learning_rate": 4.423956439525544e-06, + "loss": 0.327, + "step": 22459, + "teacher_loss": 0.2836138606071472 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.4539327621459961, + "learning_rate": 4.422345930949108e-06, + "loss": 0.2453, + "step": 22460, + "teacher_loss": 0.22208912670612335 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.8809143900871277, + "learning_rate": 4.420735664886568e-06, + "loss": 0.3518, + "step": 22461, + "teacher_loss": 0.29303354024887085 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.43342316150665283, + "learning_rate": 4.419125641374845e-06, + "loss": 0.2345, + "step": 22462, + "teacher_loss": 0.2124405801296234 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.48221659660339355, + "learning_rate": 4.417515860450844e-06, + "loss": 0.2727, + "step": 22463, + "teacher_loss": 0.24941229820251465 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.35635969042778015, + "learning_rate": 4.415906322151477e-06, + "loss": 0.1794, + "step": 22464, + "teacher_loss": 0.15978941321372986 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.20930947363376617, + "learning_rate": 4.414297026513649e-06, + "loss": 0.2111, + "step": 22465, + "teacher_loss": 0.2113484889268875 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.4871370196342468, + "learning_rate": 4.412687973574253e-06, + "loss": 0.2214, + "step": 22466, + "teacher_loss": 0.191874697804451 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.29095304012298584, + "learning_rate": 4.411079163370169e-06, + "loss": 0.1658, + "step": 22467, + "teacher_loss": 0.15185675024986267 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.576290488243103, + "learning_rate": 4.409470595938303e-06, + "loss": 0.6552, + "step": 22468, + "teacher_loss": 0.6639301776885986 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.26170191168785095, + "learning_rate": 4.407862271315524e-06, + "loss": 0.1866, + "step": 22469, + "teacher_loss": 0.17822669446468353 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.41512882709503174, + "learning_rate": 4.4062541895387005e-06, + "loss": 0.1957, + "step": 22470, + "teacher_loss": 0.17126977443695068 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.36659109592437744, + "learning_rate": 4.404646350644708e-06, + "loss": 0.1714, + "step": 22471, + "teacher_loss": 0.1497318148612976 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3778338134288788, + "learning_rate": 4.403038754670413e-06, + "loss": 0.2214, + "step": 22472, + "teacher_loss": 0.20400582253932953 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5372523069381714, + "learning_rate": 4.401431401652662e-06, + "loss": 0.2176, + "step": 22473, + "teacher_loss": 0.18211859464645386 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5265488624572754, + "learning_rate": 4.3998242916283146e-06, + "loss": 0.2109, + "step": 22474, + "teacher_loss": 0.1757970154285431 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.11866244673728943, + "learning_rate": 4.398217424634219e-06, + "loss": 0.1437, + "step": 22475, + "teacher_loss": 0.14645174145698547 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.4486837387084961, + "learning_rate": 4.396610800707211e-06, + "loss": 0.1912, + "step": 22476, + "teacher_loss": 0.1626073569059372 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5628445744514465, + "learning_rate": 4.395004419884118e-06, + "loss": 0.2659, + "step": 22477, + "teacher_loss": 0.23292362689971924 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.2755957543849945, + "learning_rate": 4.393398282201788e-06, + "loss": 0.1653, + "step": 22478, + "teacher_loss": 0.15302352607250214 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.7415618300437927, + "learning_rate": 4.391792387697035e-06, + "loss": 0.2657, + "step": 22479, + "teacher_loss": 0.2128206491470337 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.19963206350803375, + "learning_rate": 4.390186736406669e-06, + "loss": 0.1791, + "step": 22480, + "teacher_loss": 0.17687368392944336 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3069932460784912, + "learning_rate": 4.388581328367519e-06, + "loss": 0.2306, + "step": 22481, + "teacher_loss": 0.22214823961257935 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.8350578546524048, + "learning_rate": 4.386976163616385e-06, + "loss": 0.2873, + "step": 22482, + "teacher_loss": 0.2264130711555481 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.15924188494682312, + "learning_rate": 4.385371242190064e-06, + "loss": 0.1554, + "step": 22483, + "teacher_loss": 0.1549575924873352 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.40631574392318726, + "learning_rate": 4.383766564125355e-06, + "loss": 0.2564, + "step": 22484, + "teacher_loss": 0.23979106545448303 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.40063318610191345, + "learning_rate": 4.382162129459055e-06, + "loss": 0.2245, + "step": 22485, + "teacher_loss": 0.20490238070487976 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.4342784285545349, + "learning_rate": 4.38055793822794e-06, + "loss": 0.2351, + "step": 22486, + "teacher_loss": 0.21298450231552124 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.1803794503211975, + "learning_rate": 4.3789539904687905e-06, + "loss": 0.1539, + "step": 22487, + "teacher_loss": 0.1509999781847 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 1.0708751678466797, + "learning_rate": 4.377350286218389e-06, + "loss": 0.2389, + "step": 22488, + "teacher_loss": 0.14647985994815826 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3389892280101776, + "learning_rate": 4.375746825513496e-06, + "loss": 0.2117, + "step": 22489, + "teacher_loss": 0.19751468300819397 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.5767058730125427, + "learning_rate": 4.374143608390865e-06, + "loss": 0.2403, + "step": 22490, + "teacher_loss": 0.20292508602142334 + }, + { + "compression_loss": 0.0, + "epoch": 4.06, + "label_loss": 0.3074719309806824, + "learning_rate": 4.3725406348872745e-06, + "loss": 0.175, + "step": 22491, + "teacher_loss": 0.16029608249664307 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.37424999475479126, + "learning_rate": 4.370937905039463e-06, + "loss": 0.2025, + "step": 22492, + "teacher_loss": 0.18341854214668274 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.28178104758262634, + "learning_rate": 4.369335418884173e-06, + "loss": 0.1815, + "step": 22493, + "teacher_loss": 0.17039045691490173 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.7284677624702454, + "learning_rate": 4.3677331764581506e-06, + "loss": 0.3103, + "step": 22494, + "teacher_loss": 0.2638307511806488 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.5923912525177002, + "learning_rate": 4.366131177798132e-06, + "loss": 0.2393, + "step": 22495, + "teacher_loss": 0.20008601248264313 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.3565843403339386, + "learning_rate": 4.3645294229408415e-06, + "loss": 0.2369, + "step": 22496, + "teacher_loss": 0.2236430048942566 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4684481918811798, + "learning_rate": 4.362927911923003e-06, + "loss": 0.204, + "step": 22497, + "teacher_loss": 0.17464913427829742 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.36763694882392883, + "learning_rate": 4.361326644781341e-06, + "loss": 0.1921, + "step": 22498, + "teacher_loss": 0.17262691259384155 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.1315077543258667, + "learning_rate": 4.359725621552557e-06, + "loss": 0.1695, + "step": 22499, + "teacher_loss": 0.17368349432945251 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4267739951610565, + "learning_rate": 4.358124842273364e-06, + "loss": 0.2088, + "step": 22500, + "teacher_loss": 0.1845782995223999 + }, + { + "epoch": 4.07, + "eval_exact_match": 80.17029328287606, + "eval_f1": 87.60082876314915, + "step": 22500 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.42162588238716125, + "learning_rate": 4.3565243069804665e-06, + "loss": 0.1791, + "step": 22501, + "teacher_loss": 0.15219640731811523 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.7650765180587769, + "learning_rate": 4.354924015710553e-06, + "loss": 0.3852, + "step": 22502, + "teacher_loss": 0.3429529070854187 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.6458345651626587, + "learning_rate": 4.353323968500314e-06, + "loss": 0.2388, + "step": 22503, + "teacher_loss": 0.19356638193130493 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.31992587447166443, + "learning_rate": 4.351724165386442e-06, + "loss": 0.1969, + "step": 22504, + "teacher_loss": 0.18323460221290588 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4559538662433624, + "learning_rate": 4.350124606405604e-06, + "loss": 0.1766, + "step": 22505, + "teacher_loss": 0.14556415379047394 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.20623818039894104, + "learning_rate": 4.3485252915944845e-06, + "loss": 0.2054, + "step": 22506, + "teacher_loss": 0.20532625913619995 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.19963952898979187, + "learning_rate": 4.34692622098974e-06, + "loss": 0.1554, + "step": 22507, + "teacher_loss": 0.15051355957984924 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.24256964027881622, + "learning_rate": 4.3453273946280374e-06, + "loss": 0.1465, + "step": 22508, + "teacher_loss": 0.13582547008991241 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.20550374686717987, + "learning_rate": 4.3437288125460384e-06, + "loss": 0.1488, + "step": 22509, + "teacher_loss": 0.14255420863628387 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4193999469280243, + "learning_rate": 4.342130474780385e-06, + "loss": 0.2008, + "step": 22510, + "teacher_loss": 0.1764683723449707 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.3219105005264282, + "learning_rate": 4.340532381367729e-06, + "loss": 0.2, + "step": 22511, + "teacher_loss": 0.18639929592609406 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.3036709427833557, + "learning_rate": 4.338934532344701e-06, + "loss": 0.2203, + "step": 22512, + "teacher_loss": 0.2110208123922348 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.33838146924972534, + "learning_rate": 4.337336927747943e-06, + "loss": 0.209, + "step": 22513, + "teacher_loss": 0.19458970427513123 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4568299949169159, + "learning_rate": 4.335739567614085e-06, + "loss": 0.1771, + "step": 22514, + "teacher_loss": 0.14599284529685974 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.567828893661499, + "learning_rate": 4.3341424519797415e-06, + "loss": 0.222, + "step": 22515, + "teacher_loss": 0.18355447053909302 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4044817090034485, + "learning_rate": 4.332545580881537e-06, + "loss": 0.2063, + "step": 22516, + "teacher_loss": 0.18430379033088684 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.7734218239784241, + "learning_rate": 4.330948954356076e-06, + "loss": 0.274, + "step": 22517, + "teacher_loss": 0.21854552626609802 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.23295968770980835, + "learning_rate": 4.329352572439968e-06, + "loss": 0.1793, + "step": 22518, + "teacher_loss": 0.17337566614151 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.29805055260658264, + "learning_rate": 4.3277564351698165e-06, + "loss": 0.1934, + "step": 22519, + "teacher_loss": 0.18172572553157806 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.3570689558982849, + "learning_rate": 4.32616054258221e-06, + "loss": 0.237, + "step": 22520, + "teacher_loss": 0.22367411851882935 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.29694199562072754, + "learning_rate": 4.32456489471374e-06, + "loss": 0.2379, + "step": 22521, + "teacher_loss": 0.2313789576292038 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.6117671728134155, + "learning_rate": 4.322969491600993e-06, + "loss": 0.2182, + "step": 22522, + "teacher_loss": 0.17443028092384338 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.2922821640968323, + "learning_rate": 4.321374333280541e-06, + "loss": 0.2156, + "step": 22523, + "teacher_loss": 0.20710714161396027 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.11824120581150055, + "learning_rate": 4.319779419788961e-06, + "loss": 0.1526, + "step": 22524, + "teacher_loss": 0.15641391277313232 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.24226316809654236, + "learning_rate": 4.318184751162821e-06, + "loss": 0.1934, + "step": 22525, + "teacher_loss": 0.187983438372612 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.7134770750999451, + "learning_rate": 4.316590327438678e-06, + "loss": 0.2504, + "step": 22526, + "teacher_loss": 0.19889569282531738 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4353092312812805, + "learning_rate": 4.3149961486530795e-06, + "loss": 0.2275, + "step": 22527, + "teacher_loss": 0.2044464498758316 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.9352982044219971, + "learning_rate": 4.313402214842595e-06, + "loss": 0.4688, + "step": 22528, + "teacher_loss": 0.4169725775718689 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.2527202367782593, + "learning_rate": 4.311808526043756e-06, + "loss": 0.1811, + "step": 22529, + "teacher_loss": 0.1731759011745453 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4207102656364441, + "learning_rate": 4.310215082293094e-06, + "loss": 0.2288, + "step": 22530, + "teacher_loss": 0.20748502016067505 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.3170747756958008, + "learning_rate": 4.30862188362716e-06, + "loss": 0.1734, + "step": 22531, + "teacher_loss": 0.15739217400550842 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.17567959427833557, + "learning_rate": 4.30702893008247e-06, + "loss": 0.1681, + "step": 22532, + "teacher_loss": 0.1672121286392212 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.4852805733680725, + "learning_rate": 4.305436221695545e-06, + "loss": 0.253, + "step": 22533, + "teacher_loss": 0.2271641194820404 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.2636764347553253, + "learning_rate": 4.303843758502902e-06, + "loss": 0.218, + "step": 22534, + "teacher_loss": 0.2128811478614807 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.5491704940795898, + "learning_rate": 4.302251540541059e-06, + "loss": 0.3003, + "step": 22535, + "teacher_loss": 0.27269434928894043 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.13952423632144928, + "learning_rate": 4.300659567846509e-06, + "loss": 0.1315, + "step": 22536, + "teacher_loss": 0.13058537244796753 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.2675098478794098, + "learning_rate": 4.299067840455756e-06, + "loss": 0.2284, + "step": 22537, + "teacher_loss": 0.22410380840301514 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.231197789311409, + "learning_rate": 4.297476358405301e-06, + "loss": 0.2877, + "step": 22538, + "teacher_loss": 0.293992817401886 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.8145919442176819, + "learning_rate": 4.295885121731623e-06, + "loss": 0.2463, + "step": 22539, + "teacher_loss": 0.1831541210412979 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.40225785970687866, + "learning_rate": 4.294294130471199e-06, + "loss": 0.1761, + "step": 22540, + "teacher_loss": 0.1509377658367157 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.2878412902355194, + "learning_rate": 4.292703384660522e-06, + "loss": 0.1519, + "step": 22541, + "teacher_loss": 0.1368279755115509 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.6010397672653198, + "learning_rate": 4.291112884336054e-06, + "loss": 0.1907, + "step": 22542, + "teacher_loss": 0.14513175189495087 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.35069769620895386, + "learning_rate": 4.289522629534257e-06, + "loss": 0.1866, + "step": 22543, + "teacher_loss": 0.1684103012084961 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.19859886169433594, + "learning_rate": 4.287932620291593e-06, + "loss": 0.2011, + "step": 22544, + "teacher_loss": 0.20140162110328674 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.29303768277168274, + "learning_rate": 4.286342856644523e-06, + "loss": 0.1973, + "step": 22545, + "teacher_loss": 0.18661954998970032 + }, + { + "compression_loss": 0.0, + "epoch": 4.07, + "label_loss": 0.27912259101867676, + "learning_rate": 4.284753338629486e-06, + "loss": 0.2412, + "step": 22546, + "teacher_loss": 0.2370205670595169 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5424875617027283, + "learning_rate": 4.2831640662829274e-06, + "loss": 0.2808, + "step": 22547, + "teacher_loss": 0.2516745626926422 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.6425031423568726, + "learning_rate": 4.2815750396412924e-06, + "loss": 0.2759, + "step": 22548, + "teacher_loss": 0.2351227104663849 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.30593228340148926, + "learning_rate": 4.279986258741001e-06, + "loss": 0.2149, + "step": 22549, + "teacher_loss": 0.20480579137802124 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.2967720329761505, + "learning_rate": 4.278397723618485e-06, + "loss": 0.1858, + "step": 22550, + "teacher_loss": 0.17342662811279297 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.398216187953949, + "learning_rate": 4.276809434310167e-06, + "loss": 0.2753, + "step": 22551, + "teacher_loss": 0.2615974545478821 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.7743715047836304, + "learning_rate": 4.27522139085246e-06, + "loss": 0.3086, + "step": 22552, + "teacher_loss": 0.2568100690841675 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.30458182096481323, + "learning_rate": 4.273633593281762e-06, + "loss": 0.1726, + "step": 22553, + "teacher_loss": 0.15796412527561188 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.4019111394882202, + "learning_rate": 4.2720460416344975e-06, + "loss": 0.226, + "step": 22554, + "teacher_loss": 0.206502765417099 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.41173383593559265, + "learning_rate": 4.270458735947051e-06, + "loss": 0.2387, + "step": 22555, + "teacher_loss": 0.21952712535858154 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.6742020845413208, + "learning_rate": 4.268871676255814e-06, + "loss": 0.222, + "step": 22556, + "teacher_loss": 0.1717754304409027 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.49618640542030334, + "learning_rate": 4.267284862597174e-06, + "loss": 0.1875, + "step": 22557, + "teacher_loss": 0.15319719910621643 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.33164310455322266, + "learning_rate": 4.265698295007521e-06, + "loss": 0.2145, + "step": 22558, + "teacher_loss": 0.20148909091949463 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.6611484885215759, + "learning_rate": 4.264111973523215e-06, + "loss": 0.222, + "step": 22559, + "teacher_loss": 0.17321157455444336 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.36705875396728516, + "learning_rate": 4.262525898180636e-06, + "loss": 0.1663, + "step": 22560, + "teacher_loss": 0.1439988613128662 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.343810111284256, + "learning_rate": 4.26094006901615e-06, + "loss": 0.1949, + "step": 22561, + "teacher_loss": 0.17830270528793335 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.6300286650657654, + "learning_rate": 4.259354486066105e-06, + "loss": 0.2043, + "step": 22562, + "teacher_loss": 0.15699800848960876 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.3817034959793091, + "learning_rate": 4.257769149366862e-06, + "loss": 0.3076, + "step": 22563, + "teacher_loss": 0.299325168132782 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5715736150741577, + "learning_rate": 4.256184058954767e-06, + "loss": 0.2514, + "step": 22564, + "teacher_loss": 0.2158547192811966 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.547944188117981, + "learning_rate": 4.254599214866157e-06, + "loss": 0.2818, + "step": 22565, + "teacher_loss": 0.2522379755973816 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.30402642488479614, + "learning_rate": 4.253014617137375e-06, + "loss": 0.1873, + "step": 22566, + "teacher_loss": 0.17429357767105103 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.2781296372413635, + "learning_rate": 4.251430265804742e-06, + "loss": 0.2112, + "step": 22567, + "teacher_loss": 0.20381200313568115 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.3918992877006531, + "learning_rate": 4.249846160904587e-06, + "loss": 0.1953, + "step": 22568, + "teacher_loss": 0.17350533604621887 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.27357858419418335, + "learning_rate": 4.248262302473233e-06, + "loss": 0.1626, + "step": 22569, + "teacher_loss": 0.15030162036418915 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.45551547408103943, + "learning_rate": 4.246678690546986e-06, + "loss": 0.2243, + "step": 22570, + "teacher_loss": 0.19865107536315918 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.2869514226913452, + "learning_rate": 4.245095325162154e-06, + "loss": 0.1556, + "step": 22571, + "teacher_loss": 0.14104031026363373 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.4500284790992737, + "learning_rate": 4.243512206355049e-06, + "loss": 0.2183, + "step": 22572, + "teacher_loss": 0.1925460398197174 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.382717490196228, + "learning_rate": 4.2419293341619534e-06, + "loss": 0.2563, + "step": 22573, + "teacher_loss": 0.24221587181091309 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5417371988296509, + "learning_rate": 4.240346708619167e-06, + "loss": 0.21, + "step": 22574, + "teacher_loss": 0.17318981885910034 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.162979394197464, + "learning_rate": 4.238764329762969e-06, + "loss": 0.1776, + "step": 22575, + "teacher_loss": 0.17923462390899658 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 1.5670448541641235, + "learning_rate": 4.237182197629645e-06, + "loss": 0.3629, + "step": 22576, + "teacher_loss": 0.22915859520435333 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5816590785980225, + "learning_rate": 4.23560031225546e-06, + "loss": 0.2635, + "step": 22577, + "teacher_loss": 0.22810280323028564 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.6411575078964233, + "learning_rate": 4.234018673676687e-06, + "loss": 0.2548, + "step": 22578, + "teacher_loss": 0.2118661105632782 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.38639289140701294, + "learning_rate": 4.232437281929591e-06, + "loss": 0.3001, + "step": 22579, + "teacher_loss": 0.29051390290260315 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.4705466032028198, + "learning_rate": 4.2308561370504205e-06, + "loss": 0.1845, + "step": 22580, + "teacher_loss": 0.1527404934167862 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.25138893723487854, + "learning_rate": 4.229275239075433e-06, + "loss": 0.184, + "step": 22581, + "teacher_loss": 0.17651250958442688 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.8316800594329834, + "learning_rate": 4.227694588040875e-06, + "loss": 0.256, + "step": 22582, + "teacher_loss": 0.19201889634132385 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.24994195997714996, + "learning_rate": 4.22611418398298e-06, + "loss": 0.1854, + "step": 22583, + "teacher_loss": 0.1782335638999939 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.1789945363998413, + "learning_rate": 4.2245340269379835e-06, + "loss": 0.1409, + "step": 22584, + "teacher_loss": 0.1366989016532898 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.4202876389026642, + "learning_rate": 4.22295411694212e-06, + "loss": 0.1627, + "step": 22585, + "teacher_loss": 0.13408920168876648 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5731959342956543, + "learning_rate": 4.2213744540316035e-06, + "loss": 0.2806, + "step": 22586, + "teacher_loss": 0.24806751310825348 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.20278418064117432, + "learning_rate": 4.219795038242655e-06, + "loss": 0.2284, + "step": 22587, + "teacher_loss": 0.2312992811203003 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.33175382018089294, + "learning_rate": 4.21821586961149e-06, + "loss": 0.1865, + "step": 22588, + "teacher_loss": 0.17041270434856415 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.19992820918560028, + "learning_rate": 4.216636948174308e-06, + "loss": 0.1539, + "step": 22589, + "teacher_loss": 0.14876753091812134 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.28158602118492126, + "learning_rate": 4.215058273967303e-06, + "loss": 0.2021, + "step": 22590, + "teacher_loss": 0.1932845264673233 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.7990930080413818, + "learning_rate": 4.213479847026686e-06, + "loss": 0.2769, + "step": 22591, + "teacher_loss": 0.21882659196853638 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.2985904812812805, + "learning_rate": 4.211901667388635e-06, + "loss": 0.2105, + "step": 22592, + "teacher_loss": 0.20071358978748322 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.41006115078926086, + "learning_rate": 4.210323735089332e-06, + "loss": 0.169, + "step": 22593, + "teacher_loss": 0.1421830952167511 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.26100900769233704, + "learning_rate": 4.2087460501649554e-06, + "loss": 0.194, + "step": 22594, + "teacher_loss": 0.18660786747932434 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5923420190811157, + "learning_rate": 4.207168612651682e-06, + "loss": 0.2242, + "step": 22595, + "teacher_loss": 0.18331030011177063 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.5142922401428223, + "learning_rate": 4.20559142258567e-06, + "loss": 0.2751, + "step": 22596, + "teacher_loss": 0.24852584302425385 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.4100603759288788, + "learning_rate": 4.204014480003083e-06, + "loss": 0.2553, + "step": 22597, + "teacher_loss": 0.23806911706924438 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.1011427789926529, + "learning_rate": 4.202437784940082e-06, + "loss": 0.1133, + "step": 22598, + "teacher_loss": 0.11459873616695404 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.338405966758728, + "learning_rate": 4.200861337432809e-06, + "loss": 0.1848, + "step": 22599, + "teacher_loss": 0.16774345934391022 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.3041019141674042, + "learning_rate": 4.199285137517398e-06, + "loss": 0.1996, + "step": 22600, + "teacher_loss": 0.18803206086158752 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 1.1846622228622437, + "learning_rate": 4.197709185230005e-06, + "loss": 0.3567, + "step": 22601, + "teacher_loss": 0.2646849751472473 + }, + { + "compression_loss": 0.0, + "epoch": 4.08, + "label_loss": 0.24639245867729187, + "learning_rate": 4.196133480606755e-06, + "loss": 0.2016, + "step": 22602, + "teacher_loss": 0.19657376408576965 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5760072469711304, + "learning_rate": 4.194558023683762e-06, + "loss": 0.2322, + "step": 22603, + "teacher_loss": 0.1939689815044403 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5377370715141296, + "learning_rate": 4.1929828144971686e-06, + "loss": 0.1956, + "step": 22604, + "teacher_loss": 0.1575387716293335 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.2735484838485718, + "learning_rate": 4.191407853083076e-06, + "loss": 0.1553, + "step": 22605, + "teacher_loss": 0.1421223282814026 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.8876392841339111, + "learning_rate": 4.189833139477592e-06, + "loss": 0.2334, + "step": 22606, + "teacher_loss": 0.1607379913330078 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4514651894569397, + "learning_rate": 4.188258673716823e-06, + "loss": 0.1799, + "step": 22607, + "teacher_loss": 0.14973172545433044 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.480631560087204, + "learning_rate": 4.186684455836873e-06, + "loss": 0.1757, + "step": 22608, + "teacher_loss": 0.14177730679512024 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.354866623878479, + "learning_rate": 4.1851104858738235e-06, + "loss": 0.2485, + "step": 22609, + "teacher_loss": 0.23664726316928864 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.3198975920677185, + "learning_rate": 4.1835367638637656e-06, + "loss": 0.1738, + "step": 22610, + "teacher_loss": 0.15756721794605255 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.3408806025981903, + "learning_rate": 4.1819632898427835e-06, + "loss": 0.1628, + "step": 22611, + "teacher_loss": 0.14302849769592285 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.7259741425514221, + "learning_rate": 4.1803900638469464e-06, + "loss": 0.304, + "step": 22612, + "teacher_loss": 0.2570857107639313 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.504450798034668, + "learning_rate": 4.1788170859123245e-06, + "loss": 0.2361, + "step": 22613, + "teacher_loss": 0.20633485913276672 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5568752288818359, + "learning_rate": 4.177244356074989e-06, + "loss": 0.2157, + "step": 22614, + "teacher_loss": 0.1778205931186676 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.28598469495773315, + "learning_rate": 4.175671874370992e-06, + "loss": 0.1778, + "step": 22615, + "teacher_loss": 0.1658347249031067 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.6041390299797058, + "learning_rate": 4.1740996408363815e-06, + "loss": 0.2944, + "step": 22616, + "teacher_loss": 0.26003211736679077 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.2922152876853943, + "learning_rate": 4.172527655507208e-06, + "loss": 0.1728, + "step": 22617, + "teacher_loss": 0.15958666801452637 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.28521639108657837, + "learning_rate": 4.1709559184195176e-06, + "loss": 0.2226, + "step": 22618, + "teacher_loss": 0.21562987565994263 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4649069309234619, + "learning_rate": 4.1693844296093364e-06, + "loss": 0.3082, + "step": 22619, + "teacher_loss": 0.2907332181930542 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.19252392649650574, + "learning_rate": 4.167813189112698e-06, + "loss": 0.1686, + "step": 22620, + "teacher_loss": 0.16591094434261322 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.27004939317703247, + "learning_rate": 4.166242196965632e-06, + "loss": 0.1527, + "step": 22621, + "teacher_loss": 0.13971218466758728 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.2916765511035919, + "learning_rate": 4.164671453204147e-06, + "loss": 0.1896, + "step": 22622, + "teacher_loss": 0.17824479937553406 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.2132805585861206, + "learning_rate": 4.163100957864258e-06, + "loss": 0.1523, + "step": 22623, + "teacher_loss": 0.14553149044513702 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4529210925102234, + "learning_rate": 4.1615307109819785e-06, + "loss": 0.2653, + "step": 22624, + "teacher_loss": 0.24442198872566223 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.28582000732421875, + "learning_rate": 4.159960712593301e-06, + "loss": 0.2099, + "step": 22625, + "teacher_loss": 0.20150399208068848 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5984143018722534, + "learning_rate": 4.1583909627342225e-06, + "loss": 0.1656, + "step": 22626, + "teacher_loss": 0.11748944222927094 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.6292798519134521, + "learning_rate": 4.156821461440739e-06, + "loss": 0.2242, + "step": 22627, + "teacher_loss": 0.1791575700044632 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.17908817529678345, + "learning_rate": 4.1552522087488255e-06, + "loss": 0.1565, + "step": 22628, + "teacher_loss": 0.15403644740581512 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.2232891470193863, + "learning_rate": 4.153683204694469e-06, + "loss": 0.1695, + "step": 22629, + "teacher_loss": 0.1635599434375763 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.6556723117828369, + "learning_rate": 4.152114449313634e-06, + "loss": 0.2409, + "step": 22630, + "teacher_loss": 0.19480371475219727 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.43753910064697266, + "learning_rate": 4.150545942642292e-06, + "loss": 0.1924, + "step": 22631, + "teacher_loss": 0.16517898440361023 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.12581047415733337, + "learning_rate": 4.148977684716405e-06, + "loss": 0.2022, + "step": 22632, + "teacher_loss": 0.21070173382759094 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.44460129737854004, + "learning_rate": 4.147409675571924e-06, + "loss": 0.2528, + "step": 22633, + "teacher_loss": 0.23150289058685303 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.3765431344509125, + "learning_rate": 4.1458419152448e-06, + "loss": 0.2948, + "step": 22634, + "teacher_loss": 0.28576457500457764 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5270299911499023, + "learning_rate": 4.144274403770984e-06, + "loss": 0.2532, + "step": 22635, + "teacher_loss": 0.222722589969635 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.18115884065628052, + "learning_rate": 4.142707141186404e-06, + "loss": 0.2009, + "step": 22636, + "teacher_loss": 0.20312249660491943 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.27549487352371216, + "learning_rate": 4.141140127527002e-06, + "loss": 0.2176, + "step": 22637, + "teacher_loss": 0.21118830144405365 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.6282958984375, + "learning_rate": 4.139573362828695e-06, + "loss": 0.2048, + "step": 22638, + "teacher_loss": 0.15772756934165955 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5627350807189941, + "learning_rate": 4.138006847127416e-06, + "loss": 0.2619, + "step": 22639, + "teacher_loss": 0.22848185896873474 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.6914810538291931, + "learning_rate": 4.136440580459068e-06, + "loss": 0.2715, + "step": 22640, + "teacher_loss": 0.224819153547287 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.47946155071258545, + "learning_rate": 4.134874562859568e-06, + "loss": 0.2389, + "step": 22641, + "teacher_loss": 0.2121821641921997 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.47780200839042664, + "learning_rate": 4.133308794364823e-06, + "loss": 0.2489, + "step": 22642, + "teacher_loss": 0.22352181375026703 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.24092620611190796, + "learning_rate": 4.131743275010721e-06, + "loss": 0.1475, + "step": 22643, + "teacher_loss": 0.13716727495193481 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4969255030155182, + "learning_rate": 4.130178004833164e-06, + "loss": 0.2795, + "step": 22644, + "teacher_loss": 0.25529471039772034 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.38713592290878296, + "learning_rate": 4.1286129838680384e-06, + "loss": 0.1976, + "step": 22645, + "teacher_loss": 0.17654253542423248 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4934028387069702, + "learning_rate": 4.127048212151218e-06, + "loss": 0.2459, + "step": 22646, + "teacher_loss": 0.21844777464866638 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.6083205342292786, + "learning_rate": 4.125483689718585e-06, + "loss": 0.2262, + "step": 22647, + "teacher_loss": 0.18374021351337433 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.26910436153411865, + "learning_rate": 4.123919416606011e-06, + "loss": 0.2132, + "step": 22648, + "teacher_loss": 0.20700596272945404 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.11657220125198364, + "learning_rate": 4.122355392849357e-06, + "loss": 0.1625, + "step": 22649, + "teacher_loss": 0.16759991645812988 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.41936200857162476, + "learning_rate": 4.120791618484471e-06, + "loss": 0.2536, + "step": 22650, + "teacher_loss": 0.23514056205749512 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.13410958647727966, + "learning_rate": 4.119228093547226e-06, + "loss": 0.1599, + "step": 22651, + "teacher_loss": 0.1627708077430725 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5489732623100281, + "learning_rate": 4.1176648180734586e-06, + "loss": 0.2895, + "step": 22652, + "teacher_loss": 0.2606958746910095 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4245118796825409, + "learning_rate": 4.1161017920989995e-06, + "loss": 0.2054, + "step": 22653, + "teacher_loss": 0.1810726821422577 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.39075595140457153, + "learning_rate": 4.114539015659705e-06, + "loss": 0.2362, + "step": 22654, + "teacher_loss": 0.21901929378509521 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.4327138066291809, + "learning_rate": 4.112976488791395e-06, + "loss": 0.2049, + "step": 22655, + "teacher_loss": 0.1796102523803711 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.2354084700345993, + "learning_rate": 4.111414211529888e-06, + "loss": 0.4395, + "step": 22656, + "teacher_loss": 0.46217358112335205 + }, + { + "compression_loss": 0.0, + "epoch": 4.09, + "label_loss": 0.5540645718574524, + "learning_rate": 4.109852183911006e-06, + "loss": 0.1891, + "step": 22657, + "teacher_loss": 0.1485649049282074 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6115909218788147, + "learning_rate": 4.10829040597057e-06, + "loss": 0.2504, + "step": 22658, + "teacher_loss": 0.210235595703125 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6111552119255066, + "learning_rate": 4.106728877744375e-06, + "loss": 0.2149, + "step": 22659, + "teacher_loss": 0.17083293199539185 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.33633461594581604, + "learning_rate": 4.105167599268225e-06, + "loss": 0.2319, + "step": 22660, + "teacher_loss": 0.22028425335884094 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4442348778247833, + "learning_rate": 4.103606570577924e-06, + "loss": 0.2056, + "step": 22661, + "teacher_loss": 0.1791044920682907 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.1828872561454773, + "learning_rate": 4.102045791709254e-06, + "loss": 0.1835, + "step": 22662, + "teacher_loss": 0.18357203900814056 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.7625223994255066, + "learning_rate": 4.1004852626979925e-06, + "loss": 0.2631, + "step": 22663, + "teacher_loss": 0.20761752128601074 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.48668938875198364, + "learning_rate": 4.098924983579934e-06, + "loss": 0.1966, + "step": 22664, + "teacher_loss": 0.16431349515914917 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.478726327419281, + "learning_rate": 4.097364954390842e-06, + "loss": 0.1593, + "step": 22665, + "teacher_loss": 0.12385260313749313 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.2811603844165802, + "learning_rate": 4.09580517516648e-06, + "loss": 0.1598, + "step": 22666, + "teacher_loss": 0.14635232090950012 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.26371073722839355, + "learning_rate": 4.094245645942612e-06, + "loss": 0.2452, + "step": 22667, + "teacher_loss": 0.2431599497795105 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.5744708776473999, + "learning_rate": 4.092686366754999e-06, + "loss": 0.2374, + "step": 22668, + "teacher_loss": 0.19991913437843323 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6082953214645386, + "learning_rate": 4.091127337639382e-06, + "loss": 0.1821, + "step": 22669, + "teacher_loss": 0.13474726676940918 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4010860025882721, + "learning_rate": 4.089568558631508e-06, + "loss": 0.2208, + "step": 22670, + "teacher_loss": 0.20079299807548523 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.753595232963562, + "learning_rate": 4.088010029767122e-06, + "loss": 0.2459, + "step": 22671, + "teacher_loss": 0.18946939706802368 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.5105704665184021, + "learning_rate": 4.086451751081943e-06, + "loss": 0.1962, + "step": 22672, + "teacher_loss": 0.161228209733963 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.31743037700653076, + "learning_rate": 4.084893722611706e-06, + "loss": 0.1785, + "step": 22673, + "teacher_loss": 0.1631021350622177 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.40683621168136597, + "learning_rate": 4.083335944392135e-06, + "loss": 0.2543, + "step": 22674, + "teacher_loss": 0.2372998595237732 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.35393381118774414, + "learning_rate": 4.081778416458938e-06, + "loss": 0.2332, + "step": 22675, + "teacher_loss": 0.2197376787662506 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.3269878625869751, + "learning_rate": 4.0802211388478255e-06, + "loss": 0.2136, + "step": 22676, + "teacher_loss": 0.20097634196281433 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4012044668197632, + "learning_rate": 4.078664111594507e-06, + "loss": 0.177, + "step": 22677, + "teacher_loss": 0.1520952433347702 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4496087431907654, + "learning_rate": 4.077107334734679e-06, + "loss": 0.2525, + "step": 22678, + "teacher_loss": 0.23055025935173035 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4206318259239197, + "learning_rate": 4.0755508083040245e-06, + "loss": 0.2715, + "step": 22679, + "teacher_loss": 0.2548922896385193 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6155049800872803, + "learning_rate": 4.0739945323382376e-06, + "loss": 0.2044, + "step": 22680, + "teacher_loss": 0.15873342752456665 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.09657083451747894, + "learning_rate": 4.072438506873004e-06, + "loss": 0.1242, + "step": 22681, + "teacher_loss": 0.12722717225551605 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.20979011058807373, + "learning_rate": 4.070882731943987e-06, + "loss": 0.223, + "step": 22682, + "teacher_loss": 0.2244889736175537 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.30993878841400146, + "learning_rate": 4.069327207586862e-06, + "loss": 0.2749, + "step": 22683, + "teacher_loss": 0.270952045917511 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.220865860581398, + "learning_rate": 4.067771933837296e-06, + "loss": 0.1962, + "step": 22684, + "teacher_loss": 0.19348931312561035 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.23129618167877197, + "learning_rate": 4.06621691073094e-06, + "loss": 0.1934, + "step": 22685, + "teacher_loss": 0.1891534924507141 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4420129656791687, + "learning_rate": 4.064662138303449e-06, + "loss": 0.1834, + "step": 22686, + "teacher_loss": 0.15463536977767944 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.22357898950576782, + "learning_rate": 4.063107616590473e-06, + "loss": 0.187, + "step": 22687, + "teacher_loss": 0.18298810720443726 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.5648127794265747, + "learning_rate": 4.0615533456276445e-06, + "loss": 0.2803, + "step": 22688, + "teacher_loss": 0.24868208169937134 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6106789112091064, + "learning_rate": 4.059999325450608e-06, + "loss": 0.2372, + "step": 22689, + "teacher_loss": 0.19574488699436188 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4297271966934204, + "learning_rate": 4.058445556094982e-06, + "loss": 0.2001, + "step": 22690, + "teacher_loss": 0.17457827925682068 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4158221185207367, + "learning_rate": 4.056892037596394e-06, + "loss": 0.2336, + "step": 22691, + "teacher_loss": 0.21335706114768982 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.381158709526062, + "learning_rate": 4.055338769990468e-06, + "loss": 0.2417, + "step": 22692, + "teacher_loss": 0.22625300288200378 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.28542739152908325, + "learning_rate": 4.053785753312805e-06, + "loss": 0.1894, + "step": 22693, + "teacher_loss": 0.17867901921272278 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.38618004322052, + "learning_rate": 4.052232987599017e-06, + "loss": 0.186, + "step": 22694, + "teacher_loss": 0.16373300552368164 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4946167469024658, + "learning_rate": 4.050680472884707e-06, + "loss": 0.2173, + "step": 22695, + "teacher_loss": 0.1865355521440506 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6046112775802612, + "learning_rate": 4.049128209205462e-06, + "loss": 0.2102, + "step": 22696, + "teacher_loss": 0.1664019227027893 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.44900938868522644, + "learning_rate": 4.047576196596879e-06, + "loss": 0.3147, + "step": 22697, + "teacher_loss": 0.2998010516166687 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.3624172806739807, + "learning_rate": 4.046024435094534e-06, + "loss": 0.2158, + "step": 22698, + "teacher_loss": 0.19955873489379883 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.1988614946603775, + "learning_rate": 4.04447292473401e-06, + "loss": 0.1947, + "step": 22699, + "teacher_loss": 0.19424524903297424 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.11881519109010696, + "learning_rate": 4.04292166555087e-06, + "loss": 0.1309, + "step": 22700, + "teacher_loss": 0.13229748606681824 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.4090690612792969, + "learning_rate": 4.0413706575806865e-06, + "loss": 0.2289, + "step": 22701, + "teacher_loss": 0.2088746875524521 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.44665592908859253, + "learning_rate": 4.039819900859022e-06, + "loss": 0.1961, + "step": 22702, + "teacher_loss": 0.1682625263929367 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.6094774007797241, + "learning_rate": 4.038269395421424e-06, + "loss": 0.2056, + "step": 22703, + "teacher_loss": 0.16069424152374268 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.7952027320861816, + "learning_rate": 4.0367191413034425e-06, + "loss": 0.2461, + "step": 22704, + "teacher_loss": 0.18507659435272217 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.3751080334186554, + "learning_rate": 4.035169138540625e-06, + "loss": 0.1986, + "step": 22705, + "teacher_loss": 0.1789853870868683 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.32459545135498047, + "learning_rate": 4.033619387168502e-06, + "loss": 0.2188, + "step": 22706, + "teacher_loss": 0.2070883810520172 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.36693960428237915, + "learning_rate": 4.032069887222607e-06, + "loss": 0.2494, + "step": 22707, + "teacher_loss": 0.2363707572221756 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 1.1638574600219727, + "learning_rate": 4.030520638738471e-06, + "loss": 0.3754, + "step": 22708, + "teacher_loss": 0.2877826690673828 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.17444932460784912, + "learning_rate": 4.0289716417516035e-06, + "loss": 0.1735, + "step": 22709, + "teacher_loss": 0.1733928620815277 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.3411153554916382, + "learning_rate": 4.0274228962975235e-06, + "loss": 0.1777, + "step": 22710, + "teacher_loss": 0.1595703512430191 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.36173027753829956, + "learning_rate": 4.0258744024117435e-06, + "loss": 0.1902, + "step": 22711, + "teacher_loss": 0.1711013913154602 + }, + { + "compression_loss": 0.0, + "epoch": 4.1, + "label_loss": 0.2393421232700348, + "learning_rate": 4.024326160129761e-06, + "loss": 0.2, + "step": 22712, + "teacher_loss": 0.19560891389846802 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4350154995918274, + "learning_rate": 4.022778169487063e-06, + "loss": 0.2314, + "step": 22713, + "teacher_loss": 0.20881511270999908 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2271440625190735, + "learning_rate": 4.021230430519161e-06, + "loss": 0.1387, + "step": 22714, + "teacher_loss": 0.12888216972351074 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.60682612657547, + "learning_rate": 4.0196829432615285e-06, + "loss": 0.2075, + "step": 22715, + "teacher_loss": 0.1631086766719818 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.15723320841789246, + "learning_rate": 4.01813570774964e-06, + "loss": 0.1409, + "step": 22716, + "teacher_loss": 0.1391250193119049 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.44588929414749146, + "learning_rate": 4.016588724018976e-06, + "loss": 0.228, + "step": 22717, + "teacher_loss": 0.20374518632888794 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4236931800842285, + "learning_rate": 4.015041992105005e-06, + "loss": 0.2333, + "step": 22718, + "teacher_loss": 0.2121088206768036 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4152297377586365, + "learning_rate": 4.013495512043183e-06, + "loss": 0.1781, + "step": 22719, + "teacher_loss": 0.15177898108959198 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.33221930265426636, + "learning_rate": 4.01194928386897e-06, + "loss": 0.1533, + "step": 22720, + "teacher_loss": 0.13343505561351776 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.5556588172912598, + "learning_rate": 4.010403307617821e-06, + "loss": 0.2573, + "step": 22721, + "teacher_loss": 0.22420406341552734 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.42407265305519104, + "learning_rate": 4.008857583325175e-06, + "loss": 0.1803, + "step": 22722, + "teacher_loss": 0.15325365960597992 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.47183385491371155, + "learning_rate": 4.007312111026462e-06, + "loss": 0.2451, + "step": 22723, + "teacher_loss": 0.21991555392742157 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.1616220325231552, + "learning_rate": 4.005766890757135e-06, + "loss": 0.1932, + "step": 22724, + "teacher_loss": 0.19666370749473572 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.48040449619293213, + "learning_rate": 4.004221922552608e-06, + "loss": 0.1986, + "step": 22725, + "teacher_loss": 0.1672540009021759 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.8530826568603516, + "learning_rate": 4.002677206448299e-06, + "loss": 0.2626, + "step": 22726, + "teacher_loss": 0.19703079760074615 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2912660241127014, + "learning_rate": 4.001132742479639e-06, + "loss": 0.1731, + "step": 22727, + "teacher_loss": 0.15999960899353027 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.14258864521980286, + "learning_rate": 3.999588530682028e-06, + "loss": 0.1456, + "step": 22728, + "teacher_loss": 0.1459406316280365 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2268160879611969, + "learning_rate": 3.998044571090866e-06, + "loss": 0.2319, + "step": 22729, + "teacher_loss": 0.23244377970695496 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.5110557079315186, + "learning_rate": 3.996500863741556e-06, + "loss": 0.2084, + "step": 22730, + "teacher_loss": 0.17476025223731995 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4039493203163147, + "learning_rate": 3.994957408669497e-06, + "loss": 0.2916, + "step": 22731, + "teacher_loss": 0.2791168987751007 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2917608618736267, + "learning_rate": 3.993414205910064e-06, + "loss": 0.1977, + "step": 22732, + "teacher_loss": 0.18725427985191345 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.38904404640197754, + "learning_rate": 3.991871255498645e-06, + "loss": 0.2176, + "step": 22733, + "teacher_loss": 0.19860002398490906 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.33132603764533997, + "learning_rate": 3.990328557470619e-06, + "loss": 0.1764, + "step": 22734, + "teacher_loss": 0.15921461582183838 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.48762670159339905, + "learning_rate": 3.9887861118613435e-06, + "loss": 0.1866, + "step": 22735, + "teacher_loss": 0.15316076576709747 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.30529311299324036, + "learning_rate": 3.987243918706191e-06, + "loss": 0.1561, + "step": 22736, + "teacher_loss": 0.1394929140806198 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.13172733783721924, + "learning_rate": 3.985701978040522e-06, + "loss": 0.1796, + "step": 22737, + "teacher_loss": 0.18486396968364716 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.6441196203231812, + "learning_rate": 3.98416028989968e-06, + "loss": 0.2156, + "step": 22738, + "teacher_loss": 0.1680314689874649 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2546910047531128, + "learning_rate": 3.982618854319018e-06, + "loss": 0.2944, + "step": 22739, + "teacher_loss": 0.2988295257091522 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 1.2795207500457764, + "learning_rate": 3.981077671333871e-06, + "loss": 0.3757, + "step": 22740, + "teacher_loss": 0.2752285599708557 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4286991357803345, + "learning_rate": 3.9795367409795805e-06, + "loss": 0.1569, + "step": 22741, + "teacher_loss": 0.12666790187358856 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.31686174869537354, + "learning_rate": 3.977996063291465e-06, + "loss": 0.2039, + "step": 22742, + "teacher_loss": 0.19130294024944305 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.24156928062438965, + "learning_rate": 3.9764556383048575e-06, + "loss": 0.1563, + "step": 22743, + "teacher_loss": 0.1468181014060974 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.6340640783309937, + "learning_rate": 3.974915466055075e-06, + "loss": 0.2176, + "step": 22744, + "teacher_loss": 0.17128895223140717 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2559596002101898, + "learning_rate": 3.9733755465774215e-06, + "loss": 0.2066, + "step": 22745, + "teacher_loss": 0.20114782452583313 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4445616602897644, + "learning_rate": 3.971835879907206e-06, + "loss": 0.1895, + "step": 22746, + "teacher_loss": 0.1611877679824829 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2750485837459564, + "learning_rate": 3.970296466079735e-06, + "loss": 0.1833, + "step": 22747, + "teacher_loss": 0.17309531569480896 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4285891056060791, + "learning_rate": 3.968757305130294e-06, + "loss": 0.2206, + "step": 22748, + "teacher_loss": 0.19751664996147156 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.37817591428756714, + "learning_rate": 3.967218397094172e-06, + "loss": 0.1622, + "step": 22749, + "teacher_loss": 0.1381799280643463 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.7025238275527954, + "learning_rate": 3.9656797420066615e-06, + "loss": 0.2427, + "step": 22750, + "teacher_loss": 0.19163858890533447 + }, + { + "epoch": 4.11, + "eval_exact_match": 80.2554399243141, + "eval_f1": 87.68513528843437, + "step": 22750 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.23433294892311096, + "learning_rate": 3.964141339903026e-06, + "loss": 0.1949, + "step": 22751, + "teacher_loss": 0.19053223729133606 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.5709946155548096, + "learning_rate": 3.962603190818547e-06, + "loss": 0.2487, + "step": 22752, + "teacher_loss": 0.2128649204969406 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.29584360122680664, + "learning_rate": 3.96106529478848e-06, + "loss": 0.1939, + "step": 22753, + "teacher_loss": 0.18259459733963013 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.28943800926208496, + "learning_rate": 3.95952765184809e-06, + "loss": 0.2008, + "step": 22754, + "teacher_loss": 0.19097205996513367 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.37591415643692017, + "learning_rate": 3.957990262032633e-06, + "loss": 0.2498, + "step": 22755, + "teacher_loss": 0.235824316740036 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.24264061450958252, + "learning_rate": 3.956453125377351e-06, + "loss": 0.2097, + "step": 22756, + "teacher_loss": 0.20604225993156433 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2767988443374634, + "learning_rate": 3.9549162419174874e-06, + "loss": 0.3605, + "step": 22757, + "teacher_loss": 0.3698080778121948 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.11844907701015472, + "learning_rate": 3.9533796116882845e-06, + "loss": 0.1535, + "step": 22758, + "teacher_loss": 0.15740500390529633 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.7088150978088379, + "learning_rate": 3.9518432347249636e-06, + "loss": 0.2692, + "step": 22759, + "teacher_loss": 0.22033682465553284 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4092136323451996, + "learning_rate": 3.950307111062757e-06, + "loss": 0.1963, + "step": 22760, + "teacher_loss": 0.17268610000610352 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.20961187779903412, + "learning_rate": 3.9487712407368755e-06, + "loss": 0.1876, + "step": 22761, + "teacher_loss": 0.18516525626182556 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4993930757045746, + "learning_rate": 3.94723562378254e-06, + "loss": 0.219, + "step": 22762, + "teacher_loss": 0.18780581653118134 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.19580087065696716, + "learning_rate": 3.94570026023495e-06, + "loss": 0.2031, + "step": 22763, + "teacher_loss": 0.20387354493141174 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4121062755584717, + "learning_rate": 3.944165150129311e-06, + "loss": 0.2086, + "step": 22764, + "teacher_loss": 0.18594354391098022 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4467495083808899, + "learning_rate": 3.942630293500821e-06, + "loss": 0.2086, + "step": 22765, + "teacher_loss": 0.1821470707654953 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.7759133577346802, + "learning_rate": 3.941095690384664e-06, + "loss": 0.2585, + "step": 22766, + "teacher_loss": 0.2009628713130951 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.4497264623641968, + "learning_rate": 3.939561340816024e-06, + "loss": 0.2379, + "step": 22767, + "teacher_loss": 0.21431520581245422 + }, + { + "compression_loss": 0.0, + "epoch": 4.11, + "label_loss": 0.2579313814640045, + "learning_rate": 3.9380272448300884e-06, + "loss": 0.1993, + "step": 22768, + "teacher_loss": 0.192830890417099 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.5114196538925171, + "learning_rate": 3.9364934024620166e-06, + "loss": 0.2299, + "step": 22769, + "teacher_loss": 0.19863177835941315 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.32903820276260376, + "learning_rate": 3.934959813746981e-06, + "loss": 0.192, + "step": 22770, + "teacher_loss": 0.17679116129875183 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.8595058917999268, + "learning_rate": 3.9334264787201474e-06, + "loss": 0.3886, + "step": 22771, + "teacher_loss": 0.33622950315475464 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.24169015884399414, + "learning_rate": 3.931893397416666e-06, + "loss": 0.1358, + "step": 22772, + "teacher_loss": 0.12403303384780884 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.32074272632598877, + "learning_rate": 3.930360569871676e-06, + "loss": 0.1868, + "step": 22773, + "teacher_loss": 0.1718747317790985 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.47327497601509094, + "learning_rate": 3.928827996120336e-06, + "loss": 0.2836, + "step": 22774, + "teacher_loss": 0.2625022530555725 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.38969460129737854, + "learning_rate": 3.927295676197779e-06, + "loss": 0.2707, + "step": 22775, + "teacher_loss": 0.25752925872802734 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.26623645424842834, + "learning_rate": 3.9257636101391265e-06, + "loss": 0.1638, + "step": 22776, + "teacher_loss": 0.15239296853542328 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.3344690203666687, + "learning_rate": 3.924231797979519e-06, + "loss": 0.2531, + "step": 22777, + "teacher_loss": 0.2441120445728302 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.404674768447876, + "learning_rate": 3.9227002397540705e-06, + "loss": 0.3003, + "step": 22778, + "teacher_loss": 0.28875136375427246 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.463502436876297, + "learning_rate": 3.921168935497889e-06, + "loss": 0.1806, + "step": 22779, + "teacher_loss": 0.14912721514701843 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.24268566071987152, + "learning_rate": 3.919637885246089e-06, + "loss": 0.123, + "step": 22780, + "teacher_loss": 0.10965050011873245 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.20628032088279724, + "learning_rate": 3.918107089033776e-06, + "loss": 0.2183, + "step": 22781, + "teacher_loss": 0.21966543793678284 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.28634029626846313, + "learning_rate": 3.916576546896035e-06, + "loss": 0.1698, + "step": 22782, + "teacher_loss": 0.15685084462165833 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.6188716888427734, + "learning_rate": 3.915046258867966e-06, + "loss": 0.2637, + "step": 22783, + "teacher_loss": 0.22423511743545532 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.49091142416000366, + "learning_rate": 3.913516224984658e-06, + "loss": 0.2025, + "step": 22784, + "teacher_loss": 0.17049023509025574 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.5912739038467407, + "learning_rate": 3.911986445281182e-06, + "loss": 0.2041, + "step": 22785, + "teacher_loss": 0.16108059883117676 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.5654439926147461, + "learning_rate": 3.9104569197926045e-06, + "loss": 0.2746, + "step": 22786, + "teacher_loss": 0.24225589632987976 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.4637376666069031, + "learning_rate": 3.90892764855401e-06, + "loss": 0.1675, + "step": 22787, + "teacher_loss": 0.13463382422924042 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.5043166279792786, + "learning_rate": 3.907398631600451e-06, + "loss": 0.2049, + "step": 22788, + "teacher_loss": 0.1715872436761856 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.6082807779312134, + "learning_rate": 3.905869868966982e-06, + "loss": 0.2742, + "step": 22789, + "teacher_loss": 0.23704344034194946 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.78838050365448, + "learning_rate": 3.904341360688654e-06, + "loss": 0.2434, + "step": 22790, + "teacher_loss": 0.18289171159267426 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.4148459732532501, + "learning_rate": 3.9028131068005165e-06, + "loss": 0.237, + "step": 22791, + "teacher_loss": 0.21728824079036713 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.22851252555847168, + "learning_rate": 3.901285107337599e-06, + "loss": 0.1513, + "step": 22792, + "teacher_loss": 0.1427510380744934 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.7189049124717712, + "learning_rate": 3.8997573623349385e-06, + "loss": 0.3398, + "step": 22793, + "teacher_loss": 0.29772549867630005 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.25955629348754883, + "learning_rate": 3.898229871827565e-06, + "loss": 0.2074, + "step": 22794, + "teacher_loss": 0.20163947343826294 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.6709094643592834, + "learning_rate": 3.896702635850493e-06, + "loss": 0.3215, + "step": 22795, + "teacher_loss": 0.28268611431121826 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.7042547464370728, + "learning_rate": 3.895175654438738e-06, + "loss": 0.2652, + "step": 22796, + "teacher_loss": 0.21640561521053314 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.2703486680984497, + "learning_rate": 3.893648927627318e-06, + "loss": 0.1632, + "step": 22797, + "teacher_loss": 0.15126243233680725 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.45809632539749146, + "learning_rate": 3.892122455451224e-06, + "loss": 0.2443, + "step": 22798, + "teacher_loss": 0.22059085965156555 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.48233088850975037, + "learning_rate": 3.890596237945458e-06, + "loss": 0.221, + "step": 22799, + "teacher_loss": 0.19200804829597473 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.22528111934661865, + "learning_rate": 3.889070275145018e-06, + "loss": 0.2123, + "step": 22800, + "teacher_loss": 0.21080918610095978 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.25084811449050903, + "learning_rate": 3.887544567084884e-06, + "loss": 0.1747, + "step": 22801, + "teacher_loss": 0.16629287600517273 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.2577691078186035, + "learning_rate": 3.886019113800031e-06, + "loss": 0.1868, + "step": 22802, + "teacher_loss": 0.17886883020401 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.3990160822868347, + "learning_rate": 3.884493915325439e-06, + "loss": 0.3796, + "step": 22803, + "teacher_loss": 0.37743836641311646 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.1225518211722374, + "learning_rate": 3.882968971696081e-06, + "loss": 0.1391, + "step": 22804, + "teacher_loss": 0.14093998074531555 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.35597896575927734, + "learning_rate": 3.881444282946908e-06, + "loss": 0.1923, + "step": 22805, + "teacher_loss": 0.17415188252925873 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.47772926092147827, + "learning_rate": 3.8799198491128835e-06, + "loss": 0.3342, + "step": 22806, + "teacher_loss": 0.318303644657135 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.36855238676071167, + "learning_rate": 3.87839567022896e-06, + "loss": 0.234, + "step": 22807, + "teacher_loss": 0.21901264786720276 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.3471972942352295, + "learning_rate": 3.876871746330077e-06, + "loss": 0.1795, + "step": 22808, + "teacher_loss": 0.16090184450149536 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.5534819960594177, + "learning_rate": 3.875348077451174e-06, + "loss": 0.2557, + "step": 22809, + "teacher_loss": 0.22263109683990479 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.7976883053779602, + "learning_rate": 3.873824663627193e-06, + "loss": 0.2215, + "step": 22810, + "teacher_loss": 0.1574346125125885 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.42875123023986816, + "learning_rate": 3.872301504893047e-06, + "loss": 0.1958, + "step": 22811, + "teacher_loss": 0.1699080765247345 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.46442925930023193, + "learning_rate": 3.870778601283671e-06, + "loss": 0.2063, + "step": 22812, + "teacher_loss": 0.1776617467403412 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.13106337189674377, + "learning_rate": 3.869255952833971e-06, + "loss": 0.1399, + "step": 22813, + "teacher_loss": 0.14090678095817566 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.7380800843238831, + "learning_rate": 3.8677335595788595e-06, + "loss": 0.2317, + "step": 22814, + "teacher_loss": 0.17545756697654724 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.4530131220817566, + "learning_rate": 3.866211421553245e-06, + "loss": 0.2108, + "step": 22815, + "teacher_loss": 0.18388238549232483 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.3775239586830139, + "learning_rate": 3.864689538792017e-06, + "loss": 0.1762, + "step": 22816, + "teacher_loss": 0.15387769043445587 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.2615325450897217, + "learning_rate": 3.863167911330074e-06, + "loss": 0.2206, + "step": 22817, + "teacher_loss": 0.2160816192626953 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.39904022216796875, + "learning_rate": 3.861646539202304e-06, + "loss": 0.2071, + "step": 22818, + "teacher_loss": 0.18580499291419983 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.3116586208343506, + "learning_rate": 3.860125422443581e-06, + "loss": 0.1726, + "step": 22819, + "teacher_loss": 0.15717604756355286 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.12355434894561768, + "learning_rate": 3.858604561088782e-06, + "loss": 0.182, + "step": 22820, + "teacher_loss": 0.18849727511405945 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.24400505423545837, + "learning_rate": 3.857083955172782e-06, + "loss": 0.178, + "step": 22821, + "teacher_loss": 0.17069856822490692 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.42347875237464905, + "learning_rate": 3.855563604730439e-06, + "loss": 0.1899, + "step": 22822, + "teacher_loss": 0.1639215052127838 + }, + { + "compression_loss": 0.0, + "epoch": 4.12, + "label_loss": 0.3387252986431122, + "learning_rate": 3.854043509796604e-06, + "loss": 0.2224, + "step": 22823, + "teacher_loss": 0.20945142209529877 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3266870975494385, + "learning_rate": 3.852523670406136e-06, + "loss": 0.1944, + "step": 22824, + "teacher_loss": 0.1796647608280182 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.4827834367752075, + "learning_rate": 3.851004086593881e-06, + "loss": 0.199, + "step": 22825, + "teacher_loss": 0.16752278804779053 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.23960530757904053, + "learning_rate": 3.849484758394671e-06, + "loss": 0.1918, + "step": 22826, + "teacher_loss": 0.18652094900608063 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.7126004695892334, + "learning_rate": 3.847965685843347e-06, + "loss": 0.312, + "step": 22827, + "teacher_loss": 0.2674831748008728 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.9634597897529602, + "learning_rate": 3.846446868974737e-06, + "loss": 0.3058, + "step": 22828, + "teacher_loss": 0.23272985219955444 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.36815857887268066, + "learning_rate": 3.844928307823655e-06, + "loss": 0.1865, + "step": 22829, + "teacher_loss": 0.1662788689136505 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.09020170569419861, + "learning_rate": 3.843410002424924e-06, + "loss": 0.1371, + "step": 22830, + "teacher_loss": 0.14226830005645752 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.724023699760437, + "learning_rate": 3.841891952813356e-06, + "loss": 0.2743, + "step": 22831, + "teacher_loss": 0.2242925763130188 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3707645833492279, + "learning_rate": 3.840374159023747e-06, + "loss": 0.2122, + "step": 22832, + "teacher_loss": 0.19460159540176392 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.20390814542770386, + "learning_rate": 3.838856621090902e-06, + "loss": 0.2017, + "step": 22833, + "teacher_loss": 0.20148205757141113 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.25877898931503296, + "learning_rate": 3.837339339049615e-06, + "loss": 0.2446, + "step": 22834, + "teacher_loss": 0.2429705262184143 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.30381494760513306, + "learning_rate": 3.835822312934669e-06, + "loss": 0.2126, + "step": 22835, + "teacher_loss": 0.20250967144966125 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3398551046848297, + "learning_rate": 3.834305542780837e-06, + "loss": 0.1522, + "step": 22836, + "teacher_loss": 0.13137857615947723 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.45063066482543945, + "learning_rate": 3.832789028622911e-06, + "loss": 0.187, + "step": 22837, + "teacher_loss": 0.1576966643333435 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.40712884068489075, + "learning_rate": 3.831272770495653e-06, + "loss": 0.2062, + "step": 22838, + "teacher_loss": 0.18389661610126495 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.2704983353614807, + "learning_rate": 3.82975676843382e-06, + "loss": 0.1996, + "step": 22839, + "teacher_loss": 0.1917600929737091 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3123886287212372, + "learning_rate": 3.828241022472172e-06, + "loss": 0.1514, + "step": 22840, + "teacher_loss": 0.13354891538619995 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.530829668045044, + "learning_rate": 3.82672553264547e-06, + "loss": 0.2528, + "step": 22841, + "teacher_loss": 0.2219436764717102 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.5389578342437744, + "learning_rate": 3.825210298988445e-06, + "loss": 0.3024, + "step": 22842, + "teacher_loss": 0.2761152982711792 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.33514469861984253, + "learning_rate": 3.823695321535847e-06, + "loss": 0.2534, + "step": 22843, + "teacher_loss": 0.2442653328180313 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.1748175024986267, + "learning_rate": 3.822180600322409e-06, + "loss": 0.1917, + "step": 22844, + "teacher_loss": 0.19353771209716797 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.44219738245010376, + "learning_rate": 3.820666135382858e-06, + "loss": 0.3572, + "step": 22845, + "teacher_loss": 0.3477065861225128 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.2006331980228424, + "learning_rate": 3.819151926751906e-06, + "loss": 0.1618, + "step": 22846, + "teacher_loss": 0.1574380099773407 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.22027191519737244, + "learning_rate": 3.817637974464288e-06, + "loss": 0.1944, + "step": 22847, + "teacher_loss": 0.19151535630226135 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.463347464799881, + "learning_rate": 3.816124278554705e-06, + "loss": 0.433, + "step": 22848, + "teacher_loss": 0.42962202429771423 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.8238054513931274, + "learning_rate": 3.8146108390578514e-06, + "loss": 0.2932, + "step": 22849, + "teacher_loss": 0.23424723744392395 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.344815731048584, + "learning_rate": 3.8130976560084444e-06, + "loss": 0.1693, + "step": 22850, + "teacher_loss": 0.1497804820537567 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.5286739468574524, + "learning_rate": 3.81158472944117e-06, + "loss": 0.1961, + "step": 22851, + "teacher_loss": 0.15917927026748657 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.31880074739456177, + "learning_rate": 3.8100720593907064e-06, + "loss": 0.2061, + "step": 22852, + "teacher_loss": 0.1935739815235138 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.6812395453453064, + "learning_rate": 3.8085596458917426e-06, + "loss": 0.2918, + "step": 22853, + "teacher_loss": 0.24853505194187164 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.4880143404006958, + "learning_rate": 3.8070474889789562e-06, + "loss": 0.1902, + "step": 22854, + "teacher_loss": 0.1571383774280548 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.5628313422203064, + "learning_rate": 3.8055355886870095e-06, + "loss": 0.1974, + "step": 22855, + "teacher_loss": 0.15676581859588623 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.38051837682724, + "learning_rate": 3.804023945050568e-06, + "loss": 0.1904, + "step": 22856, + "teacher_loss": 0.16924715042114258 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.20348265767097473, + "learning_rate": 3.802512558104294e-06, + "loss": 0.1524, + "step": 22857, + "teacher_loss": 0.14675509929656982 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.5313616991043091, + "learning_rate": 3.8010014278828314e-06, + "loss": 0.3359, + "step": 22858, + "teacher_loss": 0.3142049312591553 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.4730963110923767, + "learning_rate": 3.799490554420831e-06, + "loss": 0.2449, + "step": 22859, + "teacher_loss": 0.2194899618625641 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.2631877660751343, + "learning_rate": 3.797979937752933e-06, + "loss": 0.178, + "step": 22860, + "teacher_loss": 0.16854281723499298 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.561118483543396, + "learning_rate": 3.796469577913768e-06, + "loss": 0.2917, + "step": 22861, + "teacher_loss": 0.26181483268737793 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.5119645595550537, + "learning_rate": 3.794959474937969e-06, + "loss": 0.2723, + "step": 22862, + "teacher_loss": 0.24570375680923462 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.20612607896327972, + "learning_rate": 3.7934496288601493e-06, + "loss": 0.1487, + "step": 22863, + "teacher_loss": 0.14233949780464172 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3395610749721527, + "learning_rate": 3.7919400397149366e-06, + "loss": 0.204, + "step": 22864, + "teacher_loss": 0.18889927864074707 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.4405629634857178, + "learning_rate": 3.79043070753693e-06, + "loss": 0.2613, + "step": 22865, + "teacher_loss": 0.24136215448379517 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.8527287244796753, + "learning_rate": 3.7889216323607394e-06, + "loss": 0.3814, + "step": 22866, + "teacher_loss": 0.3290061354637146 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.39464664459228516, + "learning_rate": 3.787412814220968e-06, + "loss": 0.2057, + "step": 22867, + "teacher_loss": 0.18473097681999207 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3150639533996582, + "learning_rate": 3.7859042531521975e-06, + "loss": 0.2089, + "step": 22868, + "teacher_loss": 0.19715292751789093 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.34170639514923096, + "learning_rate": 3.784395949189024e-06, + "loss": 0.3892, + "step": 22869, + "teacher_loss": 0.3944254517555237 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.430134117603302, + "learning_rate": 3.7828879023660277e-06, + "loss": 0.1757, + "step": 22870, + "teacher_loss": 0.14740820229053497 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.48510220646858215, + "learning_rate": 3.7813801127177768e-06, + "loss": 0.1808, + "step": 22871, + "teacher_loss": 0.14698204398155212 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.40084099769592285, + "learning_rate": 3.779872580278844e-06, + "loss": 0.1797, + "step": 22872, + "teacher_loss": 0.1551511287689209 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.3540598750114441, + "learning_rate": 3.7783653050837994e-06, + "loss": 0.174, + "step": 22873, + "teacher_loss": 0.1540299355983734 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.2955169975757599, + "learning_rate": 3.7768582871671884e-06, + "loss": 0.1625, + "step": 22874, + "teacher_loss": 0.14776170253753662 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.4249776601791382, + "learning_rate": 3.7753515265635734e-06, + "loss": 0.23, + "step": 22875, + "teacher_loss": 0.20830760896205902 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.40203937888145447, + "learning_rate": 3.77384502330749e-06, + "loss": 0.1452, + "step": 22876, + "teacher_loss": 0.11671268939971924 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.740081250667572, + "learning_rate": 3.772338777433482e-06, + "loss": 0.2306, + "step": 22877, + "teacher_loss": 0.1739429086446762 + }, + { + "compression_loss": 0.0, + "epoch": 4.13, + "label_loss": 0.4419202506542206, + "learning_rate": 3.770832788976089e-06, + "loss": 0.1772, + "step": 22878, + "teacher_loss": 0.14783495664596558 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5161177515983582, + "learning_rate": 3.769327057969828e-06, + "loss": 0.2114, + "step": 22879, + "teacher_loss": 0.17749135196208954 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2567833960056305, + "learning_rate": 3.7678215844492283e-06, + "loss": 0.2184, + "step": 22880, + "teacher_loss": 0.21409842371940613 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.6549907922744751, + "learning_rate": 3.766316368448805e-06, + "loss": 0.2624, + "step": 22881, + "teacher_loss": 0.218819722533226 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.16947561502456665, + "learning_rate": 3.7648114100030646e-06, + "loss": 0.1236, + "step": 22882, + "teacher_loss": 0.11849543452262878 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5413275957107544, + "learning_rate": 3.763306709146514e-06, + "loss": 0.2351, + "step": 22883, + "teacher_loss": 0.20107589662075043 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5842666625976562, + "learning_rate": 3.761802265913654e-06, + "loss": 0.2478, + "step": 22884, + "teacher_loss": 0.21043440699577332 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.4456090033054352, + "learning_rate": 3.760298080338976e-06, + "loss": 0.2377, + "step": 22885, + "teacher_loss": 0.2146110236644745 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.6545077562332153, + "learning_rate": 3.7587941524569597e-06, + "loss": 0.381, + "step": 22886, + "teacher_loss": 0.35060250759124756 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5028231739997864, + "learning_rate": 3.7572904823020896e-06, + "loss": 0.2214, + "step": 22887, + "teacher_loss": 0.19009234011173248 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.36871349811553955, + "learning_rate": 3.755787069908847e-06, + "loss": 0.1863, + "step": 22888, + "teacher_loss": 0.16598603129386902 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5215179920196533, + "learning_rate": 3.7542839153116903e-06, + "loss": 0.2131, + "step": 22889, + "teacher_loss": 0.17879268527030945 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.7782065868377686, + "learning_rate": 3.7527810185450855e-06, + "loss": 0.312, + "step": 22890, + "teacher_loss": 0.26017823815345764 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5057734847068787, + "learning_rate": 3.751278379643497e-06, + "loss": 0.1958, + "step": 22891, + "teacher_loss": 0.16130372881889343 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.34362465143203735, + "learning_rate": 3.749775998641365e-06, + "loss": 0.2704, + "step": 22892, + "teacher_loss": 0.2622174024581909 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.26865124702453613, + "learning_rate": 3.7482738755731406e-06, + "loss": 0.2012, + "step": 22893, + "teacher_loss": 0.19371531903743744 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5001659989356995, + "learning_rate": 3.7467720104732646e-06, + "loss": 0.187, + "step": 22894, + "teacher_loss": 0.15225905179977417 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.45260554552078247, + "learning_rate": 3.7452704033761676e-06, + "loss": 0.212, + "step": 22895, + "teacher_loss": 0.1852482259273529 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.21389439702033997, + "learning_rate": 3.743769054316269e-06, + "loss": 0.223, + "step": 22896, + "teacher_loss": 0.22405506670475006 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.4227154850959778, + "learning_rate": 3.742267963328006e-06, + "loss": 0.1887, + "step": 22897, + "teacher_loss": 0.1626890003681183 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.1892571747303009, + "learning_rate": 3.7407671304457865e-06, + "loss": 0.1734, + "step": 22898, + "teacher_loss": 0.1715894639492035 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 1.0329095125198364, + "learning_rate": 3.739266555704011e-06, + "loss": 0.2939, + "step": 22899, + "teacher_loss": 0.21182504296302795 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.413347065448761, + "learning_rate": 3.7377662391371e-06, + "loss": 0.2234, + "step": 22900, + "teacher_loss": 0.20233330130577087 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2679764926433563, + "learning_rate": 3.7362661807794446e-06, + "loss": 0.1821, + "step": 22901, + "teacher_loss": 0.17258279025554657 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.6176490187644958, + "learning_rate": 3.7347663806654293e-06, + "loss": 0.2498, + "step": 22902, + "teacher_loss": 0.2089250683784485 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.38428351283073425, + "learning_rate": 3.733266838829448e-06, + "loss": 0.1631, + "step": 22903, + "teacher_loss": 0.13850151002407074 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.4436950087547302, + "learning_rate": 3.731767555305881e-06, + "loss": 0.1846, + "step": 22904, + "teacher_loss": 0.15581044554710388 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.7048033475875854, + "learning_rate": 3.730268530129097e-06, + "loss": 0.2498, + "step": 22905, + "teacher_loss": 0.19924427568912506 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.4116630256175995, + "learning_rate": 3.728769763333467e-06, + "loss": 0.1368, + "step": 22906, + "teacher_loss": 0.10622194409370422 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2822698652744293, + "learning_rate": 3.7272712549533568e-06, + "loss": 0.179, + "step": 22907, + "teacher_loss": 0.1675695925951004 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.32546940445899963, + "learning_rate": 3.725773005023121e-06, + "loss": 0.2118, + "step": 22908, + "teacher_loss": 0.19911660254001617 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5028215050697327, + "learning_rate": 3.7242750135770975e-06, + "loss": 0.2337, + "step": 22909, + "teacher_loss": 0.20378778874874115 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2757541239261627, + "learning_rate": 3.7227772806496526e-06, + "loss": 0.2101, + "step": 22910, + "teacher_loss": 0.20283447206020355 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5202866196632385, + "learning_rate": 3.7212798062751113e-06, + "loss": 0.2349, + "step": 22911, + "teacher_loss": 0.20316708087921143 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.5867432951927185, + "learning_rate": 3.719782590487807e-06, + "loss": 0.2634, + "step": 22912, + "teacher_loss": 0.22745990753173828 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.09419476240873337, + "learning_rate": 3.7182856333220664e-06, + "loss": 0.1783, + "step": 22913, + "teacher_loss": 0.1876555234193802 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.13743966817855835, + "learning_rate": 3.7167889348122165e-06, + "loss": 0.2056, + "step": 22914, + "teacher_loss": 0.21314075589179993 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2192869931459427, + "learning_rate": 3.715292494992562e-06, + "loss": 0.1874, + "step": 22915, + "teacher_loss": 0.18389210104942322 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.34884750843048096, + "learning_rate": 3.7137963138974195e-06, + "loss": 0.2103, + "step": 22916, + "teacher_loss": 0.19491443037986755 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.714715301990509, + "learning_rate": 3.712300391561093e-06, + "loss": 0.2627, + "step": 22917, + "teacher_loss": 0.21250218152999878 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.19638285040855408, + "learning_rate": 3.710804728017872e-06, + "loss": 0.2273, + "step": 22918, + "teacher_loss": 0.23075248301029205 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.412413626909256, + "learning_rate": 3.7093093233020518e-06, + "loss": 0.2304, + "step": 22919, + "teacher_loss": 0.21021172404289246 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.31634876132011414, + "learning_rate": 3.7078141774479224e-06, + "loss": 0.1683, + "step": 22920, + "teacher_loss": 0.15188324451446533 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.45942509174346924, + "learning_rate": 3.7063192904897526e-06, + "loss": 0.1824, + "step": 22921, + "teacher_loss": 0.15157867968082428 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.23647035658359528, + "learning_rate": 3.7048246624618214e-06, + "loss": 0.1158, + "step": 22922, + "teacher_loss": 0.10233816504478455 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.4304838180541992, + "learning_rate": 3.703330293398402e-06, + "loss": 0.2657, + "step": 22923, + "teacher_loss": 0.24741026759147644 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.26016974449157715, + "learning_rate": 3.701836183333743e-06, + "loss": 0.2027, + "step": 22924, + "teacher_loss": 0.19629809260368347 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.13636326789855957, + "learning_rate": 3.700342332302113e-06, + "loss": 0.1889, + "step": 22925, + "teacher_loss": 0.1947277933359146 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.4319711923599243, + "learning_rate": 3.6988487403377504e-06, + "loss": 0.2679, + "step": 22926, + "teacher_loss": 0.24966493248939514 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.31379184126853943, + "learning_rate": 3.6973554074749077e-06, + "loss": 0.1636, + "step": 22927, + "teacher_loss": 0.14686912298202515 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.16165509819984436, + "learning_rate": 3.695862333747815e-06, + "loss": 0.1801, + "step": 22928, + "teacher_loss": 0.18217426538467407 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2274320125579834, + "learning_rate": 3.6943695191907055e-06, + "loss": 0.1805, + "step": 22929, + "teacher_loss": 0.17528891563415527 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.270415723323822, + "learning_rate": 3.692876963837813e-06, + "loss": 0.2843, + "step": 22930, + "teacher_loss": 0.28579825162887573 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.330597460269928, + "learning_rate": 3.691384667723346e-06, + "loss": 0.1358, + "step": 22931, + "teacher_loss": 0.11410612612962723 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.2944859266281128, + "learning_rate": 3.689892630881524e-06, + "loss": 0.2132, + "step": 22932, + "teacher_loss": 0.20418840646743774 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.47896048426628113, + "learning_rate": 3.688400853346558e-06, + "loss": 0.2285, + "step": 22933, + "teacher_loss": 0.20068971812725067 + }, + { + "compression_loss": 0.0, + "epoch": 4.14, + "label_loss": 0.27607735991477966, + "learning_rate": 3.6869093351526424e-06, + "loss": 0.1737, + "step": 22934, + "teacher_loss": 0.1623132824897766 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5528738498687744, + "learning_rate": 3.685418076333983e-06, + "loss": 0.2263, + "step": 22935, + "teacher_loss": 0.1899741142988205 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.36537253856658936, + "learning_rate": 3.683927076924759e-06, + "loss": 0.146, + "step": 22936, + "teacher_loss": 0.12161806970834732 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.4706469774246216, + "learning_rate": 3.6824363369591583e-06, + "loss": 0.2387, + "step": 22937, + "teacher_loss": 0.2129439413547516 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.43230509757995605, + "learning_rate": 3.680945856471367e-06, + "loss": 0.2205, + "step": 22938, + "teacher_loss": 0.19695062935352325 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.652461588382721, + "learning_rate": 3.6794556354955462e-06, + "loss": 0.2241, + "step": 22939, + "teacher_loss": 0.1764555275440216 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.34214848279953003, + "learning_rate": 3.677965674065866e-06, + "loss": 0.1961, + "step": 22940, + "teacher_loss": 0.17989175021648407 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.2174980789422989, + "learning_rate": 3.6764759722164935e-06, + "loss": 0.2382, + "step": 22941, + "teacher_loss": 0.24050256609916687 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.375851035118103, + "learning_rate": 3.674986529981571e-06, + "loss": 0.2101, + "step": 22942, + "teacher_loss": 0.19165174663066864 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 1.013223648071289, + "learning_rate": 3.6734973473952542e-06, + "loss": 0.2951, + "step": 22943, + "teacher_loss": 0.21527984738349915 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.27277395129203796, + "learning_rate": 3.6720084244916897e-06, + "loss": 0.2143, + "step": 22944, + "teacher_loss": 0.20775094628334045 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.3420126438140869, + "learning_rate": 3.670519761305008e-06, + "loss": 0.2025, + "step": 22945, + "teacher_loss": 0.18698303401470184 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.31995251774787903, + "learning_rate": 3.669031357869332e-06, + "loss": 0.2208, + "step": 22946, + "teacher_loss": 0.20978282392024994 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.1461927890777588, + "learning_rate": 3.667543214218804e-06, + "loss": 0.1272, + "step": 22947, + "teacher_loss": 0.1250927448272705 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5924326181411743, + "learning_rate": 3.6660553303875344e-06, + "loss": 0.2495, + "step": 22948, + "teacher_loss": 0.21143761277198792 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.4884374737739563, + "learning_rate": 3.66456770640963e-06, + "loss": 0.2544, + "step": 22949, + "teacher_loss": 0.22844627499580383 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.39449718594551086, + "learning_rate": 3.663080342319202e-06, + "loss": 0.1787, + "step": 22950, + "teacher_loss": 0.15475699305534363 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.555443525314331, + "learning_rate": 3.661593238150357e-06, + "loss": 0.2134, + "step": 22951, + "teacher_loss": 0.17535611987113953 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5994598865509033, + "learning_rate": 3.6601063939371802e-06, + "loss": 0.1891, + "step": 22952, + "teacher_loss": 0.14352145791053772 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.4192536473274231, + "learning_rate": 3.658619809713765e-06, + "loss": 0.2242, + "step": 22953, + "teacher_loss": 0.20252643525600433 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5504093170166016, + "learning_rate": 3.6571334855141993e-06, + "loss": 0.2432, + "step": 22954, + "teacher_loss": 0.20905114710330963 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.36973509192466736, + "learning_rate": 3.65564742137255e-06, + "loss": 0.2217, + "step": 22955, + "teacher_loss": 0.20520079135894775 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.6982301473617554, + "learning_rate": 3.6541616173228937e-06, + "loss": 0.2205, + "step": 22956, + "teacher_loss": 0.16740819811820984 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.4887813627719879, + "learning_rate": 3.6526760733992982e-06, + "loss": 0.2915, + "step": 22957, + "teacher_loss": 0.26959437131881714 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.959095299243927, + "learning_rate": 3.651190789635821e-06, + "loss": 0.2965, + "step": 22958, + "teacher_loss": 0.22287209331989288 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.4964185357093811, + "learning_rate": 3.6497057660665034e-06, + "loss": 0.2361, + "step": 22959, + "teacher_loss": 0.2072097361087799 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.23728318512439728, + "learning_rate": 3.6482210027254115e-06, + "loss": 0.2387, + "step": 22960, + "teacher_loss": 0.2388104498386383 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.21218936145305634, + "learning_rate": 3.646736499646578e-06, + "loss": 0.1071, + "step": 22961, + "teacher_loss": 0.09545591473579407 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.16541728377342224, + "learning_rate": 3.645252256864034e-06, + "loss": 0.1442, + "step": 22962, + "teacher_loss": 0.14179465174674988 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.20861364901065826, + "learning_rate": 3.6437682744118105e-06, + "loss": 0.1679, + "step": 22963, + "teacher_loss": 0.16339004039764404 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.4187069833278656, + "learning_rate": 3.642284552323939e-06, + "loss": 0.1948, + "step": 22964, + "teacher_loss": 0.16992425918579102 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.49240052700042725, + "learning_rate": 3.6408010906344254e-06, + "loss": 0.1982, + "step": 22965, + "teacher_loss": 0.16550907492637634 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.7561980485916138, + "learning_rate": 3.639317889377287e-06, + "loss": 0.2299, + "step": 22966, + "teacher_loss": 0.1714029610157013 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.7846541404724121, + "learning_rate": 3.6378349485865308e-06, + "loss": 0.3005, + "step": 22967, + "teacher_loss": 0.24674327671527863 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.9935768246650696, + "learning_rate": 3.636352268296156e-06, + "loss": 0.3081, + "step": 22968, + "teacher_loss": 0.23196813464164734 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.7232280969619751, + "learning_rate": 3.634869848540143e-06, + "loss": 0.2625, + "step": 22969, + "teacher_loss": 0.2113342583179474 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.42201149463653564, + "learning_rate": 3.633387689352499e-06, + "loss": 0.2245, + "step": 22970, + "teacher_loss": 0.20256885886192322 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.19945359230041504, + "learning_rate": 3.6319057907671966e-06, + "loss": 0.1371, + "step": 22971, + "teacher_loss": 0.13019129633903503 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.49432605504989624, + "learning_rate": 3.630424152818203e-06, + "loss": 0.2798, + "step": 22972, + "teacher_loss": 0.25594794750213623 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.39221715927124023, + "learning_rate": 3.628942775539505e-06, + "loss": 0.1539, + "step": 22973, + "teacher_loss": 0.1274665743112564 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.6071451902389526, + "learning_rate": 3.627461658965055e-06, + "loss": 0.2304, + "step": 22974, + "teacher_loss": 0.18856005370616913 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.7591613531112671, + "learning_rate": 3.6259808031288096e-06, + "loss": 0.2146, + "step": 22975, + "teacher_loss": 0.1540401726961136 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.550673246383667, + "learning_rate": 3.6245002080647227e-06, + "loss": 0.2902, + "step": 22976, + "teacher_loss": 0.2613057494163513 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.6936201453208923, + "learning_rate": 3.6230198738067462e-06, + "loss": 0.374, + "step": 22977, + "teacher_loss": 0.3384852409362793 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.19716353714466095, + "learning_rate": 3.621539800388809e-06, + "loss": 0.2243, + "step": 22978, + "teacher_loss": 0.22733023762702942 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.9593712091445923, + "learning_rate": 3.6200599878448494e-06, + "loss": 0.2647, + "step": 22979, + "teacher_loss": 0.18754179775714874 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.7853481769561768, + "learning_rate": 3.6185804362087997e-06, + "loss": 0.2766, + "step": 22980, + "teacher_loss": 0.2201196551322937 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.39945822954177856, + "learning_rate": 3.617101145514572e-06, + "loss": 0.2096, + "step": 22981, + "teacher_loss": 0.18853068351745605 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.3145533800125122, + "learning_rate": 3.615622115796088e-06, + "loss": 0.2072, + "step": 22982, + "teacher_loss": 0.19526326656341553 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.14815108478069305, + "learning_rate": 3.614143347087262e-06, + "loss": 0.1279, + "step": 22983, + "teacher_loss": 0.12562045454978943 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.38142094016075134, + "learning_rate": 3.612664839421989e-06, + "loss": 0.2015, + "step": 22984, + "teacher_loss": 0.18151132762432098 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.29872769117355347, + "learning_rate": 3.6111865928341723e-06, + "loss": 0.1843, + "step": 22985, + "teacher_loss": 0.17154821753501892 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.9854205846786499, + "learning_rate": 3.6097086073576985e-06, + "loss": 0.2835, + "step": 22986, + "teacher_loss": 0.20552051067352295 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5255351662635803, + "learning_rate": 3.6082308830264566e-06, + "loss": 0.2555, + "step": 22987, + "teacher_loss": 0.2255120575428009 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5284769535064697, + "learning_rate": 3.6067534198743295e-06, + "loss": 0.2031, + "step": 22988, + "teacher_loss": 0.16697238385677338 + }, + { + "compression_loss": 0.0, + "epoch": 4.15, + "label_loss": 0.5412842035293579, + "learning_rate": 3.6052762179351837e-06, + "loss": 0.2328, + "step": 22989, + "teacher_loss": 0.19850695133209229 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5556703805923462, + "learning_rate": 3.6037992772428955e-06, + "loss": 0.3705, + "step": 22990, + "teacher_loss": 0.3499550223350525 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.19103839993476868, + "learning_rate": 3.6023225978313196e-06, + "loss": 0.1859, + "step": 22991, + "teacher_loss": 0.18531179428100586 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.1765522062778473, + "learning_rate": 3.6008461797343134e-06, + "loss": 0.1803, + "step": 22992, + "teacher_loss": 0.1807209551334381 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.2183285355567932, + "learning_rate": 3.5993700229857317e-06, + "loss": 0.1657, + "step": 22993, + "teacher_loss": 0.1598687767982483 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.1906394511461258, + "learning_rate": 3.5978941276194092e-06, + "loss": 0.152, + "step": 22994, + "teacher_loss": 0.14773677289485931 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.1109514907002449, + "learning_rate": 3.596418493669191e-06, + "loss": 0.1249, + "step": 22995, + "teacher_loss": 0.12639667093753815 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.27491211891174316, + "learning_rate": 3.5949431211689104e-06, + "loss": 0.1636, + "step": 22996, + "teacher_loss": 0.15128490328788757 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.9285383224487305, + "learning_rate": 3.5934680101523865e-06, + "loss": 0.2512, + "step": 22997, + "teacher_loss": 0.17594145238399506 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.476473331451416, + "learning_rate": 3.591993160653447e-06, + "loss": 0.216, + "step": 22998, + "teacher_loss": 0.18701039254665375 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.20172373950481415, + "learning_rate": 3.590518572705896e-06, + "loss": 0.1433, + "step": 22999, + "teacher_loss": 0.13675940036773682 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.615238606929779, + "learning_rate": 3.589044246343547e-06, + "loss": 0.2452, + "step": 23000, + "teacher_loss": 0.20402935147285461 + }, + { + "epoch": 4.16, + "eval_exact_match": 80.52034058656575, + "eval_f1": 87.74671203053948, + "step": 23000 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.2686135172843933, + "learning_rate": 3.587570181600206e-06, + "loss": 0.1752, + "step": 23001, + "teacher_loss": 0.16479063034057617 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.6351625323295593, + "learning_rate": 3.5860963785096594e-06, + "loss": 0.2881, + "step": 23002, + "teacher_loss": 0.24951478838920593 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5944644808769226, + "learning_rate": 3.584622837105702e-06, + "loss": 0.1979, + "step": 23003, + "teacher_loss": 0.1538289338350296 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.6565265655517578, + "learning_rate": 3.5831495574221237e-06, + "loss": 0.2787, + "step": 23004, + "teacher_loss": 0.23667272925376892 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3300960063934326, + "learning_rate": 3.5816765394926916e-06, + "loss": 0.2137, + "step": 23005, + "teacher_loss": 0.20072150230407715 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.41751164197921753, + "learning_rate": 3.5802037833511823e-06, + "loss": 0.3082, + "step": 23006, + "teacher_loss": 0.2960692048072815 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3950008153915405, + "learning_rate": 3.5787312890313678e-06, + "loss": 0.1687, + "step": 23007, + "teacher_loss": 0.14360690116882324 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5454259514808655, + "learning_rate": 3.5772590565670006e-06, + "loss": 0.2142, + "step": 23008, + "teacher_loss": 0.17738831043243408 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.640907883644104, + "learning_rate": 3.5757870859918266e-06, + "loss": 0.2397, + "step": 23009, + "teacher_loss": 0.19507327675819397 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5833874940872192, + "learning_rate": 3.574315377339613e-06, + "loss": 0.2279, + "step": 23010, + "teacher_loss": 0.1884416937828064 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.4555339217185974, + "learning_rate": 3.5728439306440926e-06, + "loss": 0.2772, + "step": 23011, + "teacher_loss": 0.25733864307403564 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3392251431941986, + "learning_rate": 3.5713727459389944e-06, + "loss": 0.2683, + "step": 23012, + "teacher_loss": 0.26043885946273804 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3541724681854248, + "learning_rate": 3.5699018232580556e-06, + "loss": 0.1978, + "step": 23013, + "teacher_loss": 0.18038895726203918 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.13383889198303223, + "learning_rate": 3.5684311626350017e-06, + "loss": 0.1369, + "step": 23014, + "teacher_loss": 0.1372849941253662 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5277363657951355, + "learning_rate": 3.566960764103545e-06, + "loss": 0.3301, + "step": 23015, + "teacher_loss": 0.30815237760543823 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.44945448637008667, + "learning_rate": 3.5654906276974e-06, + "loss": 0.1915, + "step": 23016, + "teacher_loss": 0.1628149449825287 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.1567695587873459, + "learning_rate": 3.5640207534502754e-06, + "loss": 0.1412, + "step": 23017, + "teacher_loss": 0.13943041861057281 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.4491906762123108, + "learning_rate": 3.562551141395869e-06, + "loss": 0.2017, + "step": 23018, + "teacher_loss": 0.17420132458209991 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.17944678664207458, + "learning_rate": 3.5610817915678647e-06, + "loss": 0.1957, + "step": 23019, + "teacher_loss": 0.19749949872493744 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.2116556167602539, + "learning_rate": 3.559612703999967e-06, + "loss": 0.1822, + "step": 23020, + "teacher_loss": 0.178895965218544 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3557807505130768, + "learning_rate": 3.5581438787258513e-06, + "loss": 0.1876, + "step": 23021, + "teacher_loss": 0.16890501976013184 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.7308270931243896, + "learning_rate": 3.556675315779182e-06, + "loss": 0.2261, + "step": 23022, + "teacher_loss": 0.16999118030071259 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.34969595074653625, + "learning_rate": 3.5552070151936487e-06, + "loss": 0.2091, + "step": 23023, + "teacher_loss": 0.19350463151931763 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3997851014137268, + "learning_rate": 3.553738977002905e-06, + "loss": 0.2067, + "step": 23024, + "teacher_loss": 0.18525460362434387 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.34718644618988037, + "learning_rate": 3.552271201240605e-06, + "loss": 0.1876, + "step": 23025, + "teacher_loss": 0.16988863050937653 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.19922097027301788, + "learning_rate": 3.550803687940404e-06, + "loss": 0.1322, + "step": 23026, + "teacher_loss": 0.1247720718383789 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.22589808702468872, + "learning_rate": 3.5493364371359526e-06, + "loss": 0.1569, + "step": 23027, + "teacher_loss": 0.1492590606212616 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.26041728258132935, + "learning_rate": 3.54786944886088e-06, + "loss": 0.1933, + "step": 23028, + "teacher_loss": 0.1857980191707611 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.24987030029296875, + "learning_rate": 3.5464027231488265e-06, + "loss": 0.1806, + "step": 23029, + "teacher_loss": 0.1728602796792984 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.6884642839431763, + "learning_rate": 3.5449362600334247e-06, + "loss": 0.2773, + "step": 23030, + "teacher_loss": 0.23156386613845825 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.33983445167541504, + "learning_rate": 3.543470059548288e-06, + "loss": 0.166, + "step": 23031, + "teacher_loss": 0.14672136306762695 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.2822462022304535, + "learning_rate": 3.5420041217270276e-06, + "loss": 0.1874, + "step": 23032, + "teacher_loss": 0.17689284682273865 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.6106287240982056, + "learning_rate": 3.5405384466032676e-06, + "loss": 0.3592, + "step": 23033, + "teacher_loss": 0.3312756419181824 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.6505919694900513, + "learning_rate": 3.5390730342106024e-06, + "loss": 0.2125, + "step": 23034, + "teacher_loss": 0.16382786631584167 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.41064906120300293, + "learning_rate": 3.537607884582629e-06, + "loss": 0.1987, + "step": 23035, + "teacher_loss": 0.17516517639160156 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.33447691798210144, + "learning_rate": 3.536142997752939e-06, + "loss": 0.2958, + "step": 23036, + "teacher_loss": 0.29151907563209534 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.6125196218490601, + "learning_rate": 3.5346783737551252e-06, + "loss": 0.241, + "step": 23037, + "teacher_loss": 0.1997355818748474 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5986076593399048, + "learning_rate": 3.5332140126227576e-06, + "loss": 0.2309, + "step": 23038, + "teacher_loss": 0.1900341659784317 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.33399444818496704, + "learning_rate": 3.5317499143894124e-06, + "loss": 0.1846, + "step": 23039, + "teacher_loss": 0.167948380112648 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.5682659149169922, + "learning_rate": 3.5302860790886627e-06, + "loss": 0.227, + "step": 23040, + "teacher_loss": 0.18908637762069702 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.475034236907959, + "learning_rate": 3.52882250675406e-06, + "loss": 0.2104, + "step": 23041, + "teacher_loss": 0.18101096153259277 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.3575937747955322, + "learning_rate": 3.5273591974191665e-06, + "loss": 0.3321, + "step": 23042, + "teacher_loss": 0.32922661304473877 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.17881804704666138, + "learning_rate": 3.525896151117533e-06, + "loss": 0.1648, + "step": 23043, + "teacher_loss": 0.16326507925987244 + }, + { + "compression_loss": 0.0, + "epoch": 4.16, + "label_loss": 0.2862161099910736, + "learning_rate": 3.5244333678826944e-06, + "loss": 0.1842, + "step": 23044, + "teacher_loss": 0.17287206649780273 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.3862951993942261, + "learning_rate": 3.522970847748196e-06, + "loss": 0.2788, + "step": 23045, + "teacher_loss": 0.26684844493865967 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.14299936592578888, + "learning_rate": 3.5215085907475685e-06, + "loss": 0.1527, + "step": 23046, + "teacher_loss": 0.153724804520607 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.6448768377304077, + "learning_rate": 3.5200465969143308e-06, + "loss": 0.2262, + "step": 23047, + "teacher_loss": 0.17962735891342163 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.21429994702339172, + "learning_rate": 3.518584866282009e-06, + "loss": 0.1781, + "step": 23048, + "teacher_loss": 0.174087792634964 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.6824623942375183, + "learning_rate": 3.5171233988841103e-06, + "loss": 0.2198, + "step": 23049, + "teacher_loss": 0.16844826936721802 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2713364362716675, + "learning_rate": 3.515662194754145e-06, + "loss": 0.1622, + "step": 23050, + "teacher_loss": 0.1501184105873108 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.29406094551086426, + "learning_rate": 3.5142012539256172e-06, + "loss": 0.2143, + "step": 23051, + "teacher_loss": 0.20543086528778076 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 1.391892433166504, + "learning_rate": 3.5127405764320136e-06, + "loss": 0.4066, + "step": 23052, + "teacher_loss": 0.2971389591693878 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.21659645438194275, + "learning_rate": 3.5112801623068334e-06, + "loss": 0.1823, + "step": 23053, + "teacher_loss": 0.17845438420772552 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.45537999272346497, + "learning_rate": 3.5098200115835486e-06, + "loss": 0.2846, + "step": 23054, + "teacher_loss": 0.26559895277023315 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.49798262119293213, + "learning_rate": 3.5083601242956424e-06, + "loss": 0.2561, + "step": 23055, + "teacher_loss": 0.229237362742424 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2531549334526062, + "learning_rate": 3.50690050047659e-06, + "loss": 0.174, + "step": 23056, + "teacher_loss": 0.16520947217941284 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.06606639921665192, + "learning_rate": 3.5054411401598454e-06, + "loss": 0.1605, + "step": 23057, + "teacher_loss": 0.17101889848709106 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.28669625520706177, + "learning_rate": 3.503982043378877e-06, + "loss": 0.1367, + "step": 23058, + "teacher_loss": 0.12002532184123993 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5697584748268127, + "learning_rate": 3.5025232101671307e-06, + "loss": 0.2316, + "step": 23059, + "teacher_loss": 0.19402465224266052 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.3674541711807251, + "learning_rate": 3.5010646405580553e-06, + "loss": 0.2789, + "step": 23060, + "teacher_loss": 0.2690128684043884 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.37795090675354004, + "learning_rate": 3.4996063345850963e-06, + "loss": 0.2091, + "step": 23061, + "teacher_loss": 0.19036495685577393 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.6862949728965759, + "learning_rate": 3.498148292281679e-06, + "loss": 0.3393, + "step": 23062, + "teacher_loss": 0.3007887005805969 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.4280327260494232, + "learning_rate": 3.4966905136812386e-06, + "loss": 0.212, + "step": 23063, + "teacher_loss": 0.18798241019248962 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.38071465492248535, + "learning_rate": 3.4952329988172e-06, + "loss": 0.191, + "step": 23064, + "teacher_loss": 0.16994601488113403 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5661048889160156, + "learning_rate": 3.4937757477229703e-06, + "loss": 0.1905, + "step": 23065, + "teacher_loss": 0.1487686038017273 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.8560075759887695, + "learning_rate": 3.492318760431965e-06, + "loss": 0.3051, + "step": 23066, + "teacher_loss": 0.24387824535369873 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2786880135536194, + "learning_rate": 3.490862036977594e-06, + "loss": 0.1809, + "step": 23067, + "teacher_loss": 0.16999632120132446 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.36139294505119324, + "learning_rate": 3.4894055773932505e-06, + "loss": 0.1684, + "step": 23068, + "teacher_loss": 0.146906316280365 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.19137410819530487, + "learning_rate": 3.4879493817123174e-06, + "loss": 0.1659, + "step": 23069, + "teacher_loss": 0.16308638453483582 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.378268837928772, + "learning_rate": 3.4864934499681987e-06, + "loss": 0.2321, + "step": 23070, + "teacher_loss": 0.21585942804813385 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.4817790985107422, + "learning_rate": 3.485037782194266e-06, + "loss": 0.1819, + "step": 23071, + "teacher_loss": 0.14856937527656555 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.6761884093284607, + "learning_rate": 3.4835823784238885e-06, + "loss": 0.2285, + "step": 23072, + "teacher_loss": 0.17873480916023254 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.6765734553337097, + "learning_rate": 3.4821272386904403e-06, + "loss": 0.2071, + "step": 23073, + "teacher_loss": 0.15498651564121246 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.4827902913093567, + "learning_rate": 3.4806723630272857e-06, + "loss": 0.2411, + "step": 23074, + "teacher_loss": 0.21424569189548492 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.4492906928062439, + "learning_rate": 3.479217751467772e-06, + "loss": 0.2292, + "step": 23075, + "teacher_loss": 0.20478612184524536 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5636456608772278, + "learning_rate": 3.4777634040452555e-06, + "loss": 0.2305, + "step": 23076, + "teacher_loss": 0.19347113370895386 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2304190993309021, + "learning_rate": 3.476309320793083e-06, + "loss": 0.1731, + "step": 23077, + "teacher_loss": 0.16676658391952515 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2625107169151306, + "learning_rate": 3.474855501744583e-06, + "loss": 0.162, + "step": 23078, + "teacher_loss": 0.15083765983581543 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.7221174836158752, + "learning_rate": 3.473401946933091e-06, + "loss": 0.3914, + "step": 23079, + "teacher_loss": 0.35461288690567017 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.3117017447948456, + "learning_rate": 3.4719486563919404e-06, + "loss": 0.1907, + "step": 23080, + "teacher_loss": 0.17723682522773743 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.7119735479354858, + "learning_rate": 3.470495630154444e-06, + "loss": 0.2388, + "step": 23081, + "teacher_loss": 0.18624058365821838 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.3469555675983429, + "learning_rate": 3.469042868253906e-06, + "loss": 0.159, + "step": 23082, + "teacher_loss": 0.13811668753623962 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.30276572704315186, + "learning_rate": 3.467590370723652e-06, + "loss": 0.1792, + "step": 23083, + "teacher_loss": 0.165448397397995 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5348036289215088, + "learning_rate": 3.4661381375969743e-06, + "loss": 0.2451, + "step": 23084, + "teacher_loss": 0.2129574567079544 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.36367231607437134, + "learning_rate": 3.4646861689071654e-06, + "loss": 0.1796, + "step": 23085, + "teacher_loss": 0.15916860103607178 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.3639659881591797, + "learning_rate": 3.4632344646875187e-06, + "loss": 0.1554, + "step": 23086, + "teacher_loss": 0.13227801024913788 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.3779885768890381, + "learning_rate": 3.4617830249713197e-06, + "loss": 0.2076, + "step": 23087, + "teacher_loss": 0.18865551054477692 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.177667498588562, + "learning_rate": 3.460331849791838e-06, + "loss": 0.1902, + "step": 23088, + "teacher_loss": 0.19158610701560974 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2031850814819336, + "learning_rate": 3.4588809391823506e-06, + "loss": 0.2337, + "step": 23089, + "teacher_loss": 0.23713871836662292 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5875095725059509, + "learning_rate": 3.457430293176124e-06, + "loss": 0.2912, + "step": 23090, + "teacher_loss": 0.25826495885849 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.881023645401001, + "learning_rate": 3.4559799118064123e-06, + "loss": 0.304, + "step": 23091, + "teacher_loss": 0.23992681503295898 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.36596783995628357, + "learning_rate": 3.454529795106468e-06, + "loss": 0.204, + "step": 23092, + "teacher_loss": 0.18600502610206604 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5663193464279175, + "learning_rate": 3.4530799431095466e-06, + "loss": 0.2273, + "step": 23093, + "teacher_loss": 0.1896408349275589 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5457674264907837, + "learning_rate": 3.4516303558488814e-06, + "loss": 0.201, + "step": 23094, + "teacher_loss": 0.16270089149475098 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2736748158931732, + "learning_rate": 3.4501810333576996e-06, + "loss": 0.1995, + "step": 23095, + "teacher_loss": 0.19127756357192993 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2578657865524292, + "learning_rate": 3.4487319756692485e-06, + "loss": 0.1627, + "step": 23096, + "teacher_loss": 0.15217159688472748 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.29563191533088684, + "learning_rate": 3.4472831828167394e-06, + "loss": 0.1679, + "step": 23097, + "teacher_loss": 0.15369385480880737 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.2139589488506317, + "learning_rate": 3.445834654833387e-06, + "loss": 0.1381, + "step": 23098, + "teacher_loss": 0.1296936273574829 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.5471444129943848, + "learning_rate": 3.4443863917524027e-06, + "loss": 0.2578, + "step": 23099, + "teacher_loss": 0.22570329904556274 + }, + { + "compression_loss": 0.0, + "epoch": 4.17, + "label_loss": 0.47578898072242737, + "learning_rate": 3.4429383936069986e-06, + "loss": 0.3296, + "step": 23100, + "teacher_loss": 0.31338417530059814 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4150957465171814, + "learning_rate": 3.441490660430363e-06, + "loss": 0.2109, + "step": 23101, + "teacher_loss": 0.18826086819171906 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.7113062143325806, + "learning_rate": 3.440043192255693e-06, + "loss": 0.2363, + "step": 23102, + "teacher_loss": 0.18353629112243652 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.20287221670150757, + "learning_rate": 3.4385959891161767e-06, + "loss": 0.1397, + "step": 23103, + "teacher_loss": 0.13266320526599884 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.3885650336742401, + "learning_rate": 3.437149051044989e-06, + "loss": 0.1565, + "step": 23104, + "teacher_loss": 0.1306624412536621 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.2524203062057495, + "learning_rate": 3.4357023780753062e-06, + "loss": 0.2213, + "step": 23105, + "teacher_loss": 0.21788470447063446 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.5326588749885559, + "learning_rate": 3.4342559702402986e-06, + "loss": 0.2417, + "step": 23106, + "teacher_loss": 0.2094140350818634 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.38849279284477234, + "learning_rate": 3.432809827573122e-06, + "loss": 0.2148, + "step": 23107, + "teacher_loss": 0.19549046456813812 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.8679608106613159, + "learning_rate": 3.4313639501069423e-06, + "loss": 0.3087, + "step": 23108, + "teacher_loss": 0.24660570919513702 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.6019550561904907, + "learning_rate": 3.4299183378748964e-06, + "loss": 0.3181, + "step": 23109, + "teacher_loss": 0.2865433692932129 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.482722669839859, + "learning_rate": 3.4284729909101343e-06, + "loss": 0.2053, + "step": 23110, + "teacher_loss": 0.17447781562805176 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.16539128124713898, + "learning_rate": 3.4270279092457983e-06, + "loss": 0.1799, + "step": 23111, + "teacher_loss": 0.18150529265403748 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4296663999557495, + "learning_rate": 3.4255830929150094e-06, + "loss": 0.2324, + "step": 23112, + "teacher_loss": 0.21045240759849548 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.2847111225128174, + "learning_rate": 3.424138541950899e-06, + "loss": 0.199, + "step": 23113, + "teacher_loss": 0.18947505950927734 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.425641804933548, + "learning_rate": 3.4226942563865894e-06, + "loss": 0.2387, + "step": 23114, + "teacher_loss": 0.21792790293693542 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.20858192443847656, + "learning_rate": 3.4212502362551865e-06, + "loss": 0.1683, + "step": 23115, + "teacher_loss": 0.16383719444274902 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.41641461849212646, + "learning_rate": 3.419806481589805e-06, + "loss": 0.1497, + "step": 23116, + "teacher_loss": 0.12008485943078995 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.3464733958244324, + "learning_rate": 3.418362992423536e-06, + "loss": 0.1677, + "step": 23117, + "teacher_loss": 0.1478014588356018 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.26356565952301025, + "learning_rate": 3.4169197687894817e-06, + "loss": 0.1459, + "step": 23118, + "teacher_loss": 0.13278725743293762 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4236745834350586, + "learning_rate": 3.415476810720732e-06, + "loss": 0.1926, + "step": 23119, + "teacher_loss": 0.16697412729263306 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.49042966961860657, + "learning_rate": 3.4140341182503636e-06, + "loss": 0.2033, + "step": 23120, + "teacher_loss": 0.17136478424072266 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.793599009513855, + "learning_rate": 3.412591691411461e-06, + "loss": 0.2855, + "step": 23121, + "teacher_loss": 0.22906804084777832 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.48320770263671875, + "learning_rate": 3.4111495302370847e-06, + "loss": 0.2039, + "step": 23122, + "teacher_loss": 0.17289745807647705 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4167281985282898, + "learning_rate": 3.4097076347603045e-06, + "loss": 0.2219, + "step": 23123, + "teacher_loss": 0.20024336874485016 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.39261752367019653, + "learning_rate": 3.4082660050141834e-06, + "loss": 0.3335, + "step": 23124, + "teacher_loss": 0.3268764019012451 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.5964981317520142, + "learning_rate": 3.406824641031765e-06, + "loss": 0.2728, + "step": 23125, + "teacher_loss": 0.2368173599243164 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.7356168031692505, + "learning_rate": 3.4053835428460994e-06, + "loss": 0.2088, + "step": 23126, + "teacher_loss": 0.1502305269241333 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.2757972478866577, + "learning_rate": 3.4039427104902303e-06, + "loss": 0.1441, + "step": 23127, + "teacher_loss": 0.12949460744857788 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.2848433554172516, + "learning_rate": 3.4025021439971847e-06, + "loss": 0.2147, + "step": 23128, + "teacher_loss": 0.20693716406822205 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.09261728078126907, + "learning_rate": 3.4010618433999928e-06, + "loss": 0.1254, + "step": 23129, + "teacher_loss": 0.12908291816711426 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4172527492046356, + "learning_rate": 3.399621808731683e-06, + "loss": 0.1994, + "step": 23130, + "teacher_loss": 0.17517180740833282 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.3694298565387726, + "learning_rate": 3.3981820400252644e-06, + "loss": 0.1904, + "step": 23131, + "teacher_loss": 0.17045944929122925 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.6036772727966309, + "learning_rate": 3.3967425373137402e-06, + "loss": 0.198, + "step": 23132, + "teacher_loss": 0.1529236137866974 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.3890681266784668, + "learning_rate": 3.3953033006301286e-06, + "loss": 0.2426, + "step": 23133, + "teacher_loss": 0.226350799202919 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.6746411323547363, + "learning_rate": 3.393864330007419e-06, + "loss": 0.2963, + "step": 23134, + "teacher_loss": 0.2542327344417572 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.5366601347923279, + "learning_rate": 3.392425625478601e-06, + "loss": 0.251, + "step": 23135, + "teacher_loss": 0.21928879618644714 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.7229174375534058, + "learning_rate": 3.3909871870766607e-06, + "loss": 0.2026, + "step": 23136, + "teacher_loss": 0.14477306604385376 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.16683848202228546, + "learning_rate": 3.3895490148345824e-06, + "loss": 0.2173, + "step": 23137, + "teacher_loss": 0.2228713035583496 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.5871052742004395, + "learning_rate": 3.3881111087853324e-06, + "loss": 0.2727, + "step": 23138, + "teacher_loss": 0.2377534806728363 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.42284300923347473, + "learning_rate": 3.38667346896188e-06, + "loss": 0.2072, + "step": 23139, + "teacher_loss": 0.18329398334026337 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4151493310928345, + "learning_rate": 3.385236095397191e-06, + "loss": 0.1943, + "step": 23140, + "teacher_loss": 0.16978409886360168 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.41488131880760193, + "learning_rate": 3.3837989881242142e-06, + "loss": 0.1417, + "step": 23141, + "teacher_loss": 0.11133973300457001 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.565300703048706, + "learning_rate": 3.3823621471758915e-06, + "loss": 0.2247, + "step": 23142, + "teacher_loss": 0.18681365251541138 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.9429868459701538, + "learning_rate": 3.380925572585183e-06, + "loss": 0.2981, + "step": 23143, + "teacher_loss": 0.22640720009803772 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.25328555703163147, + "learning_rate": 3.3794892643850138e-06, + "loss": 0.1962, + "step": 23144, + "teacher_loss": 0.1898488700389862 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.269192099571228, + "learning_rate": 3.3780532226083084e-06, + "loss": 0.2468, + "step": 23145, + "teacher_loss": 0.24431046843528748 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.40381455421447754, + "learning_rate": 3.3766174472880046e-06, + "loss": 0.2304, + "step": 23146, + "teacher_loss": 0.2111397087574005 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4522622525691986, + "learning_rate": 3.3751819384570164e-06, + "loss": 0.1818, + "step": 23147, + "teacher_loss": 0.1517300307750702 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.3893733620643616, + "learning_rate": 3.3737466961482477e-06, + "loss": 0.3172, + "step": 23148, + "teacher_loss": 0.30912572145462036 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.18304932117462158, + "learning_rate": 3.3723117203946102e-06, + "loss": 0.188, + "step": 23149, + "teacher_loss": 0.18857598304748535 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.10586945712566376, + "learning_rate": 3.3708770112290076e-06, + "loss": 0.173, + "step": 23150, + "teacher_loss": 0.18046292662620544 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.16860730946063995, + "learning_rate": 3.3694425686843257e-06, + "loss": 0.192, + "step": 23151, + "teacher_loss": 0.19455364346504211 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.33729445934295654, + "learning_rate": 3.368008392793454e-06, + "loss": 0.2079, + "step": 23152, + "teacher_loss": 0.19349642097949982 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.6588553190231323, + "learning_rate": 3.3665744835892804e-06, + "loss": 0.2214, + "step": 23153, + "teacher_loss": 0.17283034324645996 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.23850327730178833, + "learning_rate": 3.3651408411046713e-06, + "loss": 0.1432, + "step": 23154, + "teacher_loss": 0.13265465199947357 + }, + { + "compression_loss": 0.0, + "epoch": 4.18, + "label_loss": 0.4854457378387451, + "learning_rate": 3.363707465372499e-06, + "loss": 0.2116, + "step": 23155, + "teacher_loss": 0.18115684390068054 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.2723848223686218, + "learning_rate": 3.362274356425631e-06, + "loss": 0.1509, + "step": 23156, + "teacher_loss": 0.13745540380477905 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 1.0670541524887085, + "learning_rate": 3.3608415142969216e-06, + "loss": 0.2708, + "step": 23157, + "teacher_loss": 0.1823194921016693 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.19311510026454926, + "learning_rate": 3.359408939019214e-06, + "loss": 0.1402, + "step": 23158, + "teacher_loss": 0.13434293866157532 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3140757083892822, + "learning_rate": 3.357976630625361e-06, + "loss": 0.1709, + "step": 23159, + "teacher_loss": 0.15504290163516998 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.30423688888549805, + "learning_rate": 3.3565445891482022e-06, + "loss": 0.2463, + "step": 23160, + "teacher_loss": 0.23991422355175018 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.47882339358329773, + "learning_rate": 3.355112814620564e-06, + "loss": 0.2395, + "step": 23161, + "teacher_loss": 0.21287930011749268 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3179911673069, + "learning_rate": 3.353681307075275e-06, + "loss": 0.1748, + "step": 23162, + "teacher_loss": 0.15884165465831757 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3309684097766876, + "learning_rate": 3.352250066545162e-06, + "loss": 0.2097, + "step": 23163, + "teacher_loss": 0.1961754560470581 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.2728070020675659, + "learning_rate": 3.3508190930630267e-06, + "loss": 0.1953, + "step": 23164, + "teacher_loss": 0.18664434552192688 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.2471676468849182, + "learning_rate": 3.3493883866616844e-06, + "loss": 0.2007, + "step": 23165, + "teacher_loss": 0.19557629525661469 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3898439407348633, + "learning_rate": 3.3479579473739396e-06, + "loss": 0.2091, + "step": 23166, + "teacher_loss": 0.18903875350952148 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.526746392250061, + "learning_rate": 3.346527775232582e-06, + "loss": 0.2977, + "step": 23167, + "teacher_loss": 0.2722380757331848 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.30383870005607605, + "learning_rate": 3.3450978702704026e-06, + "loss": 0.1954, + "step": 23168, + "teacher_loss": 0.18331778049468994 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.7254639863967896, + "learning_rate": 3.3436682325201894e-06, + "loss": 0.2012, + "step": 23169, + "teacher_loss": 0.14290496706962585 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.28834739327430725, + "learning_rate": 3.3422388620147126e-06, + "loss": 0.206, + "step": 23170, + "teacher_loss": 0.1968344897031784 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3278650641441345, + "learning_rate": 3.3408097587867508e-06, + "loss": 0.1576, + "step": 23171, + "teacher_loss": 0.13863252103328705 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.5140213966369629, + "learning_rate": 3.3393809228690615e-06, + "loss": 0.2292, + "step": 23172, + "teacher_loss": 0.19749847054481506 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3850061297416687, + "learning_rate": 3.3379523542944064e-06, + "loss": 0.2267, + "step": 23173, + "teacher_loss": 0.20907780528068542 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4654046595096588, + "learning_rate": 3.336524053095544e-06, + "loss": 0.1822, + "step": 23174, + "teacher_loss": 0.15067876875400543 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.6908611059188843, + "learning_rate": 3.3350960193052128e-06, + "loss": 0.2103, + "step": 23175, + "teacher_loss": 0.15692821145057678 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.36671182513237, + "learning_rate": 3.33366825295616e-06, + "loss": 0.1861, + "step": 23176, + "teacher_loss": 0.16601984202861786 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.12948335707187653, + "learning_rate": 3.332240754081112e-06, + "loss": 0.1298, + "step": 23177, + "teacher_loss": 0.12986937165260315 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.47745347023010254, + "learning_rate": 3.330813522712803e-06, + "loss": 0.1947, + "step": 23178, + "teacher_loss": 0.1632394790649414 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4009149372577667, + "learning_rate": 3.3293865588839572e-06, + "loss": 0.1994, + "step": 23179, + "teacher_loss": 0.17702603340148926 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4606701135635376, + "learning_rate": 3.327959862627283e-06, + "loss": 0.2311, + "step": 23180, + "teacher_loss": 0.2056129425764084 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.39936357736587524, + "learning_rate": 3.326533433975498e-06, + "loss": 0.214, + "step": 23181, + "teacher_loss": 0.19342336058616638 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4957432150840759, + "learning_rate": 3.325107272961301e-06, + "loss": 0.285, + "step": 23182, + "teacher_loss": 0.2616129517555237 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3253798186779022, + "learning_rate": 3.3236813796173874e-06, + "loss": 0.1679, + "step": 23183, + "teacher_loss": 0.1504509598016739 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.41809338331222534, + "learning_rate": 3.322255753976459e-06, + "loss": 0.2774, + "step": 23184, + "teacher_loss": 0.26179519295692444 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.2753937244415283, + "learning_rate": 3.3208303960711895e-06, + "loss": 0.2219, + "step": 23185, + "teacher_loss": 0.21598400175571442 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4757876396179199, + "learning_rate": 3.319405305934264e-06, + "loss": 0.22, + "step": 23186, + "teacher_loss": 0.19157151877880096 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.28362560272216797, + "learning_rate": 3.317980483598358e-06, + "loss": 0.1972, + "step": 23187, + "teacher_loss": 0.18758606910705566 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4339344799518585, + "learning_rate": 3.3165559290961305e-06, + "loss": 0.2347, + "step": 23188, + "teacher_loss": 0.21256569027900696 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.22173359990119934, + "learning_rate": 3.3151316424602473e-06, + "loss": 0.3242, + "step": 23189, + "teacher_loss": 0.33562013506889343 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3903476595878601, + "learning_rate": 3.313707623723367e-06, + "loss": 0.1981, + "step": 23190, + "teacher_loss": 0.176687091588974 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3558241128921509, + "learning_rate": 3.3122838729181337e-06, + "loss": 0.2486, + "step": 23191, + "teacher_loss": 0.23665130138397217 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.17406710982322693, + "learning_rate": 3.3108603900771796e-06, + "loss": 0.1564, + "step": 23192, + "teacher_loss": 0.15439894795417786 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.23174987733364105, + "learning_rate": 3.3094371752331614e-06, + "loss": 0.1345, + "step": 23193, + "teacher_loss": 0.123715341091156 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.2956083416938782, + "learning_rate": 3.308014228418697e-06, + "loss": 0.2285, + "step": 23194, + "teacher_loss": 0.22104580700397491 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.5295372605323792, + "learning_rate": 3.3065915496664036e-06, + "loss": 0.2556, + "step": 23195, + "teacher_loss": 0.22513464093208313 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.3629109263420105, + "learning_rate": 3.305169139008916e-06, + "loss": 0.1687, + "step": 23196, + "teacher_loss": 0.1470872014760971 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.5343495011329651, + "learning_rate": 3.3037469964788375e-06, + "loss": 0.2891, + "step": 23197, + "teacher_loss": 0.26180732250213623 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.5343927145004272, + "learning_rate": 3.3023251221087696e-06, + "loss": 0.1842, + "step": 23198, + "teacher_loss": 0.14534254372119904 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.13145811855793, + "learning_rate": 3.3009035159313133e-06, + "loss": 0.1611, + "step": 23199, + "teacher_loss": 0.16436973214149475 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.45168089866638184, + "learning_rate": 3.2994821779790692e-06, + "loss": 0.2444, + "step": 23200, + "teacher_loss": 0.22133547067642212 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.24440333247184753, + "learning_rate": 3.2980611082846155e-06, + "loss": 0.2401, + "step": 23201, + "teacher_loss": 0.23964785039424896 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.34215888381004333, + "learning_rate": 3.2966403068805354e-06, + "loss": 0.1438, + "step": 23202, + "teacher_loss": 0.12181514501571655 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.7256677746772766, + "learning_rate": 3.295219773799408e-06, + "loss": 0.2474, + "step": 23203, + "teacher_loss": 0.19421373307704926 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4638415277004242, + "learning_rate": 3.2937995090737994e-06, + "loss": 0.1843, + "step": 23204, + "teacher_loss": 0.1532231718301773 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4606209695339203, + "learning_rate": 3.292379512736263e-06, + "loss": 0.2317, + "step": 23205, + "teacher_loss": 0.20623856782913208 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.7658290863037109, + "learning_rate": 3.2909597848193703e-06, + "loss": 0.2832, + "step": 23206, + "teacher_loss": 0.2295701801776886 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.43750715255737305, + "learning_rate": 3.2895403253556637e-06, + "loss": 0.2398, + "step": 23207, + "teacher_loss": 0.21785637736320496 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.30773842334747314, + "learning_rate": 3.2881211343776845e-06, + "loss": 0.2379, + "step": 23208, + "teacher_loss": 0.23010963201522827 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.4041637182235718, + "learning_rate": 3.286702211917975e-06, + "loss": 0.2011, + "step": 23209, + "teacher_loss": 0.17848192155361176 + }, + { + "compression_loss": 0.0, + "epoch": 4.19, + "label_loss": 0.36244457960128784, + "learning_rate": 3.285283558009068e-06, + "loss": 0.2011, + "step": 23210, + "teacher_loss": 0.18321293592453003 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.46524864435195923, + "learning_rate": 3.2838651726834818e-06, + "loss": 0.3107, + "step": 23211, + "teacher_loss": 0.29352104663848877 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.41521868109703064, + "learning_rate": 3.2824470559737417e-06, + "loss": 0.1819, + "step": 23212, + "teacher_loss": 0.15600429475307465 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.48029810190200806, + "learning_rate": 3.281029207912364e-06, + "loss": 0.2252, + "step": 23213, + "teacher_loss": 0.19683459401130676 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.41815704107284546, + "learning_rate": 3.279611628531845e-06, + "loss": 0.1873, + "step": 23214, + "teacher_loss": 0.16170324385166168 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.22239267826080322, + "learning_rate": 3.278194317864693e-06, + "loss": 0.1346, + "step": 23215, + "teacher_loss": 0.12484588474035263 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.3809296190738678, + "learning_rate": 3.276777275943406e-06, + "loss": 0.2397, + "step": 23216, + "teacher_loss": 0.22396604716777802 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.42656710743904114, + "learning_rate": 3.275360502800464e-06, + "loss": 0.2285, + "step": 23217, + "teacher_loss": 0.20649024844169617 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.34348562359809875, + "learning_rate": 3.273943998468354e-06, + "loss": 0.2613, + "step": 23218, + "teacher_loss": 0.25216394662857056 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.1662067323923111, + "learning_rate": 3.272527762979553e-06, + "loss": 0.1574, + "step": 23219, + "teacher_loss": 0.15641099214553833 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4828524589538574, + "learning_rate": 3.2711117963665322e-06, + "loss": 0.1973, + "step": 23220, + "teacher_loss": 0.16558346152305603 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4261665940284729, + "learning_rate": 3.2696960986617486e-06, + "loss": 0.181, + "step": 23221, + "teacher_loss": 0.15380455553531647 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.653442919254303, + "learning_rate": 3.2682806698976633e-06, + "loss": 0.2423, + "step": 23222, + "teacher_loss": 0.19660450518131256 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.36236006021499634, + "learning_rate": 3.266865510106733e-06, + "loss": 0.168, + "step": 23223, + "teacher_loss": 0.14637230336666107 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.9944034218788147, + "learning_rate": 3.265450619321394e-06, + "loss": 0.298, + "step": 23224, + "teacher_loss": 0.22064675390720367 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.2331797480583191, + "learning_rate": 3.264035997574092e-06, + "loss": 0.2021, + "step": 23225, + "teacher_loss": 0.19866512715816498 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.16841256618499756, + "learning_rate": 3.262621644897261e-06, + "loss": 0.1946, + "step": 23226, + "teacher_loss": 0.19747957587242126 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.8005828857421875, + "learning_rate": 3.261207561323321e-06, + "loss": 0.3342, + "step": 23227, + "teacher_loss": 0.28238290548324585 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.27268239855766296, + "learning_rate": 3.259793746884697e-06, + "loss": 0.193, + "step": 23228, + "teacher_loss": 0.1841348111629486 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.27460533380508423, + "learning_rate": 3.258380201613808e-06, + "loss": 0.1354, + "step": 23229, + "teacher_loss": 0.11998498439788818 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4222524166107178, + "learning_rate": 3.2569669255430545e-06, + "loss": 0.24, + "step": 23230, + "teacher_loss": 0.21979591250419617 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.23753851652145386, + "learning_rate": 3.2555539187048455e-06, + "loss": 0.1285, + "step": 23231, + "teacher_loss": 0.11642719060182571 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.5014715790748596, + "learning_rate": 3.2541411811315685e-06, + "loss": 0.248, + "step": 23232, + "teacher_loss": 0.21984541416168213 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.3061332106590271, + "learning_rate": 3.2527287128556195e-06, + "loss": 0.1705, + "step": 23233, + "teacher_loss": 0.15537869930267334 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.5836219787597656, + "learning_rate": 3.2513165139093837e-06, + "loss": 0.2233, + "step": 23234, + "teacher_loss": 0.1832505315542221 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4008907973766327, + "learning_rate": 3.2499045843252324e-06, + "loss": 0.1946, + "step": 23235, + "teacher_loss": 0.1716565489768982 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.2025545835494995, + "learning_rate": 3.248492924135541e-06, + "loss": 0.1781, + "step": 23236, + "teacher_loss": 0.17538204789161682 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.07936612516641617, + "learning_rate": 3.247081533372677e-06, + "loss": 0.1837, + "step": 23237, + "teacher_loss": 0.1952945590019226 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4083752930164337, + "learning_rate": 3.2456704120689933e-06, + "loss": 0.2179, + "step": 23238, + "teacher_loss": 0.1967182457447052 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.23880809545516968, + "learning_rate": 3.2442595602568486e-06, + "loss": 0.2351, + "step": 23239, + "teacher_loss": 0.23464131355285645 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.1807381957769394, + "learning_rate": 3.2428489779685837e-06, + "loss": 0.141, + "step": 23240, + "teacher_loss": 0.1366264373064041 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.6207588315010071, + "learning_rate": 3.241438665236541e-06, + "loss": 0.5384, + "step": 23241, + "teacher_loss": 0.5292384624481201 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.539265513420105, + "learning_rate": 3.240028622093062e-06, + "loss": 0.2652, + "step": 23242, + "teacher_loss": 0.234774649143219 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.22803576290607452, + "learning_rate": 3.2386188485704614e-06, + "loss": 0.1169, + "step": 23243, + "teacher_loss": 0.10460447520017624 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 1.2326433658599854, + "learning_rate": 3.237209344701074e-06, + "loss": 0.3174, + "step": 23244, + "teacher_loss": 0.21570897102355957 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.47658029198646545, + "learning_rate": 3.2358001105172057e-06, + "loss": 0.2152, + "step": 23245, + "teacher_loss": 0.1861284077167511 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.3026275038719177, + "learning_rate": 3.234391146051169e-06, + "loss": 0.1742, + "step": 23246, + "teacher_loss": 0.1599264144897461 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4877398610115051, + "learning_rate": 3.2329824513352723e-06, + "loss": 0.277, + "step": 23247, + "teacher_loss": 0.25361716747283936 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.5098623633384705, + "learning_rate": 3.2315740264018054e-06, + "loss": 0.2111, + "step": 23248, + "teacher_loss": 0.17791599035263062 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.17468449473381042, + "learning_rate": 3.2301658712830627e-06, + "loss": 0.1834, + "step": 23249, + "teacher_loss": 0.1843147575855255 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.16486650705337524, + "learning_rate": 3.2287579860113314e-06, + "loss": 0.1183, + "step": 23250, + "teacher_loss": 0.113125279545784 + }, + { + "epoch": 4.2, + "eval_exact_match": 80.32166508987702, + "eval_f1": 87.6927754004118, + "step": 23250 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.462316632270813, + "learning_rate": 3.2273503706188854e-06, + "loss": 0.2153, + "step": 23251, + "teacher_loss": 0.1878376007080078 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4400089979171753, + "learning_rate": 3.2259430251379978e-06, + "loss": 0.2136, + "step": 23252, + "teacher_loss": 0.1884135603904724 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.16478344798088074, + "learning_rate": 3.2245359496009407e-06, + "loss": 0.1776, + "step": 23253, + "teacher_loss": 0.17907708883285522 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.22347962856292725, + "learning_rate": 3.2231291440399685e-06, + "loss": 0.1682, + "step": 23254, + "teacher_loss": 0.16203506290912628 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 1.0127646923065186, + "learning_rate": 3.2217226084873284e-06, + "loss": 0.3603, + "step": 23255, + "teacher_loss": 0.28783291578292847 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.29524099826812744, + "learning_rate": 3.2203163429752853e-06, + "loss": 0.2296, + "step": 23256, + "teacher_loss": 0.2222689986228943 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.23543590307235718, + "learning_rate": 3.218910347536069e-06, + "loss": 0.1237, + "step": 23257, + "teacher_loss": 0.11124849319458008 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.1931852400302887, + "learning_rate": 3.217504622201915e-06, + "loss": 0.143, + "step": 23258, + "teacher_loss": 0.13747794926166534 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4095076024532318, + "learning_rate": 3.2160991670050535e-06, + "loss": 0.1933, + "step": 23259, + "teacher_loss": 0.1692245602607727 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.482041597366333, + "learning_rate": 3.2146939819777115e-06, + "loss": 0.2066, + "step": 23260, + "teacher_loss": 0.17600588500499725 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.7865533828735352, + "learning_rate": 3.2132890671520992e-06, + "loss": 0.2448, + "step": 23261, + "teacher_loss": 0.1845979243516922 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.6000989675521851, + "learning_rate": 3.211884422560429e-06, + "loss": 0.2525, + "step": 23262, + "teacher_loss": 0.21391861140727997 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 1.0954487323760986, + "learning_rate": 3.210480048234911e-06, + "loss": 0.2698, + "step": 23263, + "teacher_loss": 0.17809641361236572 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.3608379065990448, + "learning_rate": 3.2090759442077406e-06, + "loss": 0.2233, + "step": 23264, + "teacher_loss": 0.2079797387123108 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.4619797170162201, + "learning_rate": 3.2076721105110964e-06, + "loss": 0.2726, + "step": 23265, + "teacher_loss": 0.2515532970428467 + }, + { + "compression_loss": 0.0, + "epoch": 4.2, + "label_loss": 0.7716729640960693, + "learning_rate": 3.206268547177186e-06, + "loss": 0.2792, + "step": 23266, + "teacher_loss": 0.22444219887256622 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.7559354305267334, + "learning_rate": 3.2048652542381767e-06, + "loss": 0.2383, + "step": 23267, + "teacher_loss": 0.18077275156974792 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.4962038993835449, + "learning_rate": 3.203462231726237e-06, + "loss": 0.2798, + "step": 23268, + "teacher_loss": 0.25574052333831787 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.40254688262939453, + "learning_rate": 3.202059479673547e-06, + "loss": 0.1964, + "step": 23269, + "teacher_loss": 0.17348355054855347 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.30453866720199585, + "learning_rate": 3.200656998112263e-06, + "loss": 0.2514, + "step": 23270, + "teacher_loss": 0.24550817906856537 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.3186757564544678, + "learning_rate": 3.1992547870745326e-06, + "loss": 0.2608, + "step": 23271, + "teacher_loss": 0.25441813468933105 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 1.0288543701171875, + "learning_rate": 3.197852846592508e-06, + "loss": 0.2775, + "step": 23272, + "teacher_loss": 0.1940314769744873 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.5421605706214905, + "learning_rate": 3.1964511766983394e-06, + "loss": 0.232, + "step": 23273, + "teacher_loss": 0.19748297333717346 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 1.2474877834320068, + "learning_rate": 3.1950497774241504e-06, + "loss": 0.4592, + "step": 23274, + "teacher_loss": 0.3715963661670685 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 1.3529140949249268, + "learning_rate": 3.193648648802079e-06, + "loss": 0.2799, + "step": 23275, + "teacher_loss": 0.16063448786735535 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.6370127201080322, + "learning_rate": 3.192247790864249e-06, + "loss": 0.2675, + "step": 23276, + "teacher_loss": 0.22643055021762848 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.2137598842382431, + "learning_rate": 3.1908472036427733e-06, + "loss": 0.2129, + "step": 23277, + "teacher_loss": 0.21281951665878296 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.360918253660202, + "learning_rate": 3.1894468871697647e-06, + "loss": 0.1479, + "step": 23278, + "teacher_loss": 0.12421050667762756 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.46242764592170715, + "learning_rate": 3.188046841477332e-06, + "loss": 0.2461, + "step": 23279, + "teacher_loss": 0.22201907634735107 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.6964929103851318, + "learning_rate": 3.186647066597569e-06, + "loss": 0.2589, + "step": 23280, + "teacher_loss": 0.21029508113861084 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.38443073630332947, + "learning_rate": 3.185247562562574e-06, + "loss": 0.3303, + "step": 23281, + "teacher_loss": 0.3242555260658264 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.28113818168640137, + "learning_rate": 3.1838483294044246e-06, + "loss": 0.248, + "step": 23282, + "teacher_loss": 0.24429091811180115 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.21890994906425476, + "learning_rate": 3.1824493671552123e-06, + "loss": 0.1295, + "step": 23283, + "teacher_loss": 0.11961519718170166 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.13107125461101532, + "learning_rate": 3.1810506758469997e-06, + "loss": 0.1761, + "step": 23284, + "teacher_loss": 0.18107828497886658 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.21407577395439148, + "learning_rate": 3.179652255511861e-06, + "loss": 0.1282, + "step": 23285, + "teacher_loss": 0.11870527267456055 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.43053627014160156, + "learning_rate": 3.178254106181859e-06, + "loss": 0.1841, + "step": 23286, + "teacher_loss": 0.15671546757221222 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.36164426803588867, + "learning_rate": 3.176856227889045e-06, + "loss": 0.1909, + "step": 23287, + "teacher_loss": 0.17188404500484467 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.6132554411888123, + "learning_rate": 3.1754586206654695e-06, + "loss": 0.2177, + "step": 23288, + "teacher_loss": 0.17375633120536804 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.41749054193496704, + "learning_rate": 3.1740612845431806e-06, + "loss": 0.2477, + "step": 23289, + "teacher_loss": 0.2288322150707245 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.21687300503253937, + "learning_rate": 3.1726642195542057e-06, + "loss": 0.2279, + "step": 23290, + "teacher_loss": 0.2291523665189743 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.2819981575012207, + "learning_rate": 3.171267425730579e-06, + "loss": 0.1648, + "step": 23291, + "teacher_loss": 0.15180012583732605 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.3829386532306671, + "learning_rate": 3.169870903104332e-06, + "loss": 0.2372, + "step": 23292, + "teacher_loss": 0.22096776962280273 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.29942187666893005, + "learning_rate": 3.168474651707471e-06, + "loss": 0.2127, + "step": 23293, + "teacher_loss": 0.2031051218509674 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.44084835052490234, + "learning_rate": 3.167078671572018e-06, + "loss": 0.2754, + "step": 23294, + "teacher_loss": 0.2570471167564392 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.27950647473335266, + "learning_rate": 3.165682962729971e-06, + "loss": 0.1566, + "step": 23295, + "teacher_loss": 0.14290569722652435 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.6405006647109985, + "learning_rate": 3.1642875252133315e-06, + "loss": 0.2043, + "step": 23296, + "teacher_loss": 0.1558743715286255 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.8259952664375305, + "learning_rate": 3.162892359054098e-06, + "loss": 0.3018, + "step": 23297, + "teacher_loss": 0.24350564181804657 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.24093598127365112, + "learning_rate": 3.1614974642842493e-06, + "loss": 0.132, + "step": 23298, + "teacher_loss": 0.11984948068857193 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.28399449586868286, + "learning_rate": 3.160102840935769e-06, + "loss": 0.1869, + "step": 23299, + "teacher_loss": 0.17612066864967346 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.2547873258590698, + "learning_rate": 3.1587084890406386e-06, + "loss": 0.1779, + "step": 23300, + "teacher_loss": 0.1693544238805771 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.5891237258911133, + "learning_rate": 3.157314408630816e-06, + "loss": 0.2638, + "step": 23301, + "teacher_loss": 0.22761720418930054 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.3508014678955078, + "learning_rate": 3.15592059973827e-06, + "loss": 0.2122, + "step": 23302, + "teacher_loss": 0.19679348170757294 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.3223114311695099, + "learning_rate": 3.1545270623949526e-06, + "loss": 0.2913, + "step": 23303, + "teacher_loss": 0.2878662347793579 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.44774654507637024, + "learning_rate": 3.153133796632819e-06, + "loss": 0.1954, + "step": 23304, + "teacher_loss": 0.16733747720718384 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.16391853988170624, + "learning_rate": 3.151740802483802e-06, + "loss": 0.1794, + "step": 23305, + "teacher_loss": 0.18116486072540283 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.312136173248291, + "learning_rate": 3.1503480799798475e-06, + "loss": 0.193, + "step": 23306, + "teacher_loss": 0.17973214387893677 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.45220401883125305, + "learning_rate": 3.1489556291528883e-06, + "loss": 0.1993, + "step": 23307, + "teacher_loss": 0.17115429043769836 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.2904306650161743, + "learning_rate": 3.147563450034841e-06, + "loss": 0.1493, + "step": 23308, + "teacher_loss": 0.13363364338874817 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.8664333820343018, + "learning_rate": 3.1461715426576287e-06, + "loss": 0.2861, + "step": 23309, + "teacher_loss": 0.2216053456068039 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.1970183253288269, + "learning_rate": 3.1447799070531674e-06, + "loss": 0.1642, + "step": 23310, + "teacher_loss": 0.16052848100662231 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.4456264078617096, + "learning_rate": 3.143388543253355e-06, + "loss": 0.22, + "step": 23311, + "teacher_loss": 0.19491145014762878 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.5036458373069763, + "learning_rate": 3.1419974512900956e-06, + "loss": 0.1892, + "step": 23312, + "teacher_loss": 0.1542593240737915 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.20315611362457275, + "learning_rate": 3.1406066311952856e-06, + "loss": 0.1628, + "step": 23313, + "teacher_loss": 0.15834292769432068 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.48135173320770264, + "learning_rate": 3.1392160830008097e-06, + "loss": 0.169, + "step": 23314, + "teacher_loss": 0.13425378501415253 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.412717342376709, + "learning_rate": 3.137825806738541e-06, + "loss": 0.2068, + "step": 23315, + "teacher_loss": 0.18387356400489807 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.2765898108482361, + "learning_rate": 3.1364358024403693e-06, + "loss": 0.2047, + "step": 23316, + "teacher_loss": 0.19671399891376495 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.49432748556137085, + "learning_rate": 3.1350460701381554e-06, + "loss": 0.2564, + "step": 23317, + "teacher_loss": 0.2300100326538086 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.27979224920272827, + "learning_rate": 3.1336566098637553e-06, + "loss": 0.2189, + "step": 23318, + "teacher_loss": 0.21210408210754395 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.28960633277893066, + "learning_rate": 3.132267421649038e-06, + "loss": 0.2297, + "step": 23319, + "teacher_loss": 0.22302685678005219 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.3693777322769165, + "learning_rate": 3.1308785055258506e-06, + "loss": 0.1768, + "step": 23320, + "teacher_loss": 0.15540450811386108 + }, + { + "compression_loss": 0.0, + "epoch": 4.21, + "label_loss": 0.38619837164878845, + "learning_rate": 3.1294898615260263e-06, + "loss": 0.1737, + "step": 23321, + "teacher_loss": 0.15005414187908173 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.6213864088058472, + "learning_rate": 3.1281014896814126e-06, + "loss": 0.2326, + "step": 23322, + "teacher_loss": 0.18942174315452576 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.3797953128814697, + "learning_rate": 3.12671339002384e-06, + "loss": 0.1887, + "step": 23323, + "teacher_loss": 0.16748473048210144 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.4347117841243744, + "learning_rate": 3.125325562585128e-06, + "loss": 0.1879, + "step": 23324, + "teacher_loss": 0.16047623753547668 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.32720768451690674, + "learning_rate": 3.1239380073971e-06, + "loss": 0.2056, + "step": 23325, + "teacher_loss": 0.1921185702085495 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2594717741012573, + "learning_rate": 3.1225507244915696e-06, + "loss": 0.1398, + "step": 23326, + "teacher_loss": 0.1264829784631729 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.29920393228530884, + "learning_rate": 3.121163713900341e-06, + "loss": 0.159, + "step": 23327, + "teacher_loss": 0.14343662559986115 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.8305924534797668, + "learning_rate": 3.119776975655206e-06, + "loss": 0.3161, + "step": 23328, + "teacher_loss": 0.25896286964416504 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2483414113521576, + "learning_rate": 3.118390509787973e-06, + "loss": 0.1554, + "step": 23329, + "teacher_loss": 0.14505809545516968 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.3258824646472931, + "learning_rate": 3.1170043163304245e-06, + "loss": 0.1936, + "step": 23330, + "teacher_loss": 0.1788853108882904 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2784022092819214, + "learning_rate": 3.115618395314336e-06, + "loss": 0.2354, + "step": 23331, + "teacher_loss": 0.23062962293624878 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.3397368788719177, + "learning_rate": 3.114232746771484e-06, + "loss": 0.2037, + "step": 23332, + "teacher_loss": 0.18862146139144897 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5959842205047607, + "learning_rate": 3.1128473707336463e-06, + "loss": 0.1875, + "step": 23333, + "teacher_loss": 0.1421494483947754 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.3117603063583374, + "learning_rate": 3.1114622672325747e-06, + "loss": 0.199, + "step": 23334, + "teacher_loss": 0.18651226162910461 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.3570409119129181, + "learning_rate": 3.110077436300028e-06, + "loss": 0.2134, + "step": 23335, + "teacher_loss": 0.1973954141139984 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.4300231337547302, + "learning_rate": 3.108692877967763e-06, + "loss": 0.1775, + "step": 23336, + "teacher_loss": 0.1494424045085907 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.290812611579895, + "learning_rate": 3.107308592267514e-06, + "loss": 0.1688, + "step": 23337, + "teacher_loss": 0.1552354097366333 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.4182751774787903, + "learning_rate": 3.1059245792310224e-06, + "loss": 0.2881, + "step": 23338, + "teacher_loss": 0.27362892031669617 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.35841047763824463, + "learning_rate": 3.104540838890026e-06, + "loss": 0.2121, + "step": 23339, + "teacher_loss": 0.19583261013031006 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2748536169528961, + "learning_rate": 3.1031573712762368e-06, + "loss": 0.2067, + "step": 23340, + "teacher_loss": 0.19914984703063965 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.22894063591957092, + "learning_rate": 3.1017741764213832e-06, + "loss": 0.1727, + "step": 23341, + "teacher_loss": 0.16640454530715942 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.569707453250885, + "learning_rate": 3.100391254357178e-06, + "loss": 0.2365, + "step": 23342, + "teacher_loss": 0.19945085048675537 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.32030758261680603, + "learning_rate": 3.0990086051153236e-06, + "loss": 0.1776, + "step": 23343, + "teacher_loss": 0.1617041975259781 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.13490645587444305, + "learning_rate": 3.0976262287275175e-06, + "loss": 0.1313, + "step": 23344, + "teacher_loss": 0.13087283074855804 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5579209923744202, + "learning_rate": 3.096244125225457e-06, + "loss": 0.2962, + "step": 23345, + "teacher_loss": 0.26711922883987427 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.33370986580848694, + "learning_rate": 3.0948622946408317e-06, + "loss": 0.2387, + "step": 23346, + "teacher_loss": 0.2281380295753479 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.43069136142730713, + "learning_rate": 3.093480737005318e-06, + "loss": 0.1978, + "step": 23347, + "teacher_loss": 0.17197491228580475 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.1749725043773651, + "learning_rate": 3.0920994523505943e-06, + "loss": 0.1197, + "step": 23348, + "teacher_loss": 0.11355361342430115 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.20738239586353302, + "learning_rate": 3.090718440708331e-06, + "loss": 0.1404, + "step": 23349, + "teacher_loss": 0.13300752639770508 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.468522310256958, + "learning_rate": 3.0893377021101854e-06, + "loss": 0.2255, + "step": 23350, + "teacher_loss": 0.19853244721889496 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.30323949456214905, + "learning_rate": 3.0879572365878152e-06, + "loss": 0.1649, + "step": 23351, + "teacher_loss": 0.1495271772146225 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5940144062042236, + "learning_rate": 3.0865770441728747e-06, + "loss": 0.282, + "step": 23352, + "teacher_loss": 0.24735905230045319 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.27005207538604736, + "learning_rate": 3.0851971248970005e-06, + "loss": 0.1364, + "step": 23353, + "teacher_loss": 0.12155468761920929 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.27472546696662903, + "learning_rate": 3.0838174787918395e-06, + "loss": 0.1369, + "step": 23354, + "teacher_loss": 0.12157252430915833 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2313234806060791, + "learning_rate": 3.0824381058890123e-06, + "loss": 0.2621, + "step": 23355, + "teacher_loss": 0.26557034254074097 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.3240395188331604, + "learning_rate": 3.0810590062201487e-06, + "loss": 0.1828, + "step": 23356, + "teacher_loss": 0.1671273410320282 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.6955951452255249, + "learning_rate": 3.0796801798168707e-06, + "loss": 0.2239, + "step": 23357, + "teacher_loss": 0.17153066396713257 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.4184282422065735, + "learning_rate": 3.078301626710782e-06, + "loss": 0.2141, + "step": 23358, + "teacher_loss": 0.19143730401992798 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.424055278301239, + "learning_rate": 3.0769233469334967e-06, + "loss": 0.2649, + "step": 23359, + "teacher_loss": 0.24721673130989075 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.43032824993133545, + "learning_rate": 3.075545340516614e-06, + "loss": 0.2277, + "step": 23360, + "teacher_loss": 0.2052096128463745 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.4023629426956177, + "learning_rate": 3.0741676074917217e-06, + "loss": 0.2298, + "step": 23361, + "teacher_loss": 0.2106795608997345 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2856014668941498, + "learning_rate": 3.0727901478904103e-06, + "loss": 0.2378, + "step": 23362, + "teacher_loss": 0.23252660036087036 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5177009701728821, + "learning_rate": 3.0714129617442652e-06, + "loss": 0.2168, + "step": 23363, + "teacher_loss": 0.18339797854423523 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.591826319694519, + "learning_rate": 3.0700360490848588e-06, + "loss": 0.2162, + "step": 23364, + "teacher_loss": 0.17446091771125793 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.393990695476532, + "learning_rate": 3.0686594099437522e-06, + "loss": 0.1877, + "step": 23365, + "teacher_loss": 0.1647799015045166 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.4505118727684021, + "learning_rate": 3.067283044352513e-06, + "loss": 0.2317, + "step": 23366, + "teacher_loss": 0.2074400782585144 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5250614881515503, + "learning_rate": 3.065906952342703e-06, + "loss": 0.2482, + "step": 23367, + "teacher_loss": 0.2174263298511505 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.2600533366203308, + "learning_rate": 3.064531133945862e-06, + "loss": 0.157, + "step": 23368, + "teacher_loss": 0.14558903872966766 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.6682767271995544, + "learning_rate": 3.0631555891935363e-06, + "loss": 0.2881, + "step": 23369, + "teacher_loss": 0.24584899842739105 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.30913203954696655, + "learning_rate": 3.061780318117269e-06, + "loss": 0.1823, + "step": 23370, + "teacher_loss": 0.1681535542011261 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.7967884540557861, + "learning_rate": 3.0604053207485838e-06, + "loss": 0.3123, + "step": 23371, + "teacher_loss": 0.25841328501701355 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5161321759223938, + "learning_rate": 3.059030597119006e-06, + "loss": 0.2631, + "step": 23372, + "teacher_loss": 0.23493850231170654 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.33540183305740356, + "learning_rate": 3.05765614726006e-06, + "loss": 0.187, + "step": 23373, + "teacher_loss": 0.17056095600128174 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5006957054138184, + "learning_rate": 3.0562819712032513e-06, + "loss": 0.2427, + "step": 23374, + "teacher_loss": 0.214012011885643 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.5709456205368042, + "learning_rate": 3.054908068980085e-06, + "loss": 0.3275, + "step": 23375, + "teacher_loss": 0.30042925477027893 + }, + { + "compression_loss": 0.0, + "epoch": 4.22, + "label_loss": 0.36636725068092346, + "learning_rate": 3.0535344406220695e-06, + "loss": 0.2296, + "step": 23376, + "teacher_loss": 0.21442541480064392 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.3970506191253662, + "learning_rate": 3.0521610861606926e-06, + "loss": 0.2222, + "step": 23377, + "teacher_loss": 0.20282377302646637 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.5968098640441895, + "learning_rate": 3.050788005627431e-06, + "loss": 0.2538, + "step": 23378, + "teacher_loss": 0.2156357318162918 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.4083077311515808, + "learning_rate": 3.049415199053782e-06, + "loss": 0.2184, + "step": 23379, + "teacher_loss": 0.19726793467998505 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.2398323267698288, + "learning_rate": 3.0480426664712153e-06, + "loss": 0.1646, + "step": 23380, + "teacher_loss": 0.15623663365840912 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.3097718358039856, + "learning_rate": 3.0466704079111913e-06, + "loss": 0.1497, + "step": 23381, + "teacher_loss": 0.13187578320503235 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.21565264463424683, + "learning_rate": 3.045298423405175e-06, + "loss": 0.2184, + "step": 23382, + "teacher_loss": 0.2186783105134964 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.3526037335395813, + "learning_rate": 3.0439267129846297e-06, + "loss": 0.1984, + "step": 23383, + "teacher_loss": 0.18125870823860168 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.4021896719932556, + "learning_rate": 3.0425552766809957e-06, + "loss": 0.2102, + "step": 23384, + "teacher_loss": 0.18887048959732056 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.3450854420661926, + "learning_rate": 3.041184114525717e-06, + "loss": 0.1842, + "step": 23385, + "teacher_loss": 0.1662786304950714 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.29741767048835754, + "learning_rate": 3.0398132265502373e-06, + "loss": 0.1874, + "step": 23386, + "teacher_loss": 0.17521792650222778 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.47177982330322266, + "learning_rate": 3.038442612785981e-06, + "loss": 0.2488, + "step": 23387, + "teacher_loss": 0.2240654081106186 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.09591615200042725, + "learning_rate": 3.037072273264365e-06, + "loss": 0.1572, + "step": 23388, + "teacher_loss": 0.16404278576374054 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.2959417700767517, + "learning_rate": 3.0357022080168246e-06, + "loss": 0.2325, + "step": 23389, + "teacher_loss": 0.22539573907852173 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.5060272216796875, + "learning_rate": 3.0343324170747605e-06, + "loss": 0.2102, + "step": 23390, + "teacher_loss": 0.1773017942905426 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.22305548191070557, + "learning_rate": 3.0329629004695715e-06, + "loss": 0.12, + "step": 23391, + "teacher_loss": 0.10858811438083649 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.2350330352783203, + "learning_rate": 3.03159365823267e-06, + "loss": 0.1733, + "step": 23392, + "teacher_loss": 0.16648122668266296 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.13447819650173187, + "learning_rate": 3.0302246903954456e-06, + "loss": 0.1698, + "step": 23393, + "teacher_loss": 0.17375323176383972 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.6864510774612427, + "learning_rate": 3.0288559969892773e-06, + "loss": 0.3051, + "step": 23394, + "teacher_loss": 0.26276645064353943 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.4945695996284485, + "learning_rate": 3.0274875780455478e-06, + "loss": 0.2678, + "step": 23395, + "teacher_loss": 0.24258771538734436 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.6094739437103271, + "learning_rate": 3.0261194335956384e-06, + "loss": 0.2003, + "step": 23396, + "teacher_loss": 0.15486091375350952 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.29347124695777893, + "learning_rate": 3.0247515636709077e-06, + "loss": 0.2107, + "step": 23397, + "teacher_loss": 0.2015039324760437 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.33906489610671997, + "learning_rate": 3.023383968302718e-06, + "loss": 0.1807, + "step": 23398, + "teacher_loss": 0.16311398148536682 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.8298303484916687, + "learning_rate": 3.022016647522431e-06, + "loss": 0.2633, + "step": 23399, + "teacher_loss": 0.20037055015563965 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.6456891298294067, + "learning_rate": 3.020649601361386e-06, + "loss": 0.3382, + "step": 23400, + "teacher_loss": 0.304018497467041 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.31144827604293823, + "learning_rate": 3.01928282985093e-06, + "loss": 0.2068, + "step": 23401, + "teacher_loss": 0.1951437145471573 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.4323830306529999, + "learning_rate": 3.017916333022403e-06, + "loss": 0.1992, + "step": 23402, + "teacher_loss": 0.17325958609580994 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.2369326949119568, + "learning_rate": 3.0165501109071263e-06, + "loss": 0.2104, + "step": 23403, + "teacher_loss": 0.20749524235725403 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.35950973629951477, + "learning_rate": 3.0151841635364304e-06, + "loss": 0.1902, + "step": 23404, + "teacher_loss": 0.1713523268699646 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.3136492967605591, + "learning_rate": 3.013818490941626e-06, + "loss": 0.2182, + "step": 23405, + "teacher_loss": 0.20759734511375427 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.5813215970993042, + "learning_rate": 3.0124530931540325e-06, + "loss": 0.2478, + "step": 23406, + "teacher_loss": 0.21069425344467163 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.37100541591644287, + "learning_rate": 3.011087970204942e-06, + "loss": 0.2366, + "step": 23407, + "teacher_loss": 0.22165217995643616 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.39068782329559326, + "learning_rate": 3.009723122125662e-06, + "loss": 0.1696, + "step": 23408, + "teacher_loss": 0.14508675038814545 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.6514219641685486, + "learning_rate": 3.008358548947486e-06, + "loss": 0.2707, + "step": 23409, + "teacher_loss": 0.22835254669189453 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.3497718572616577, + "learning_rate": 3.0069942507016913e-06, + "loss": 0.2642, + "step": 23410, + "teacher_loss": 0.2547234892845154 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.35071495175361633, + "learning_rate": 3.00563022741956e-06, + "loss": 0.1666, + "step": 23411, + "teacher_loss": 0.14615866541862488 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.39620763063430786, + "learning_rate": 3.0042664791323724e-06, + "loss": 0.2217, + "step": 23412, + "teacher_loss": 0.20229363441467285 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.856957197189331, + "learning_rate": 3.002903005871385e-06, + "loss": 0.3126, + "step": 23413, + "teacher_loss": 0.2520948052406311 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.32809919118881226, + "learning_rate": 3.001539807667863e-06, + "loss": 0.1934, + "step": 23414, + "teacher_loss": 0.17840775847434998 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.16026541590690613, + "learning_rate": 3.0001768845530626e-06, + "loss": 0.2411, + "step": 23415, + "teacher_loss": 0.2500517964363098 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.22602596879005432, + "learning_rate": 2.9988142365582256e-06, + "loss": 0.2008, + "step": 23416, + "teacher_loss": 0.19803878664970398 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.5577503442764282, + "learning_rate": 2.997451863714602e-06, + "loss": 0.2244, + "step": 23417, + "teacher_loss": 0.18734243512153625 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.8251248598098755, + "learning_rate": 2.9960897660534155e-06, + "loss": 0.2436, + "step": 23418, + "teacher_loss": 0.1789398491382599 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 1.0075130462646484, + "learning_rate": 2.9947279436059023e-06, + "loss": 0.2727, + "step": 23419, + "teacher_loss": 0.19105108082294464 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.5820173621177673, + "learning_rate": 2.9933663964032877e-06, + "loss": 0.2449, + "step": 23420, + "teacher_loss": 0.20742951333522797 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.6726857423782349, + "learning_rate": 2.99200512447678e-06, + "loss": 0.2352, + "step": 23421, + "teacher_loss": 0.18657097220420837 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.5102705955505371, + "learning_rate": 2.9906441278575924e-06, + "loss": 0.2191, + "step": 23422, + "teacher_loss": 0.1867186725139618 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.29545438289642334, + "learning_rate": 2.989283406576932e-06, + "loss": 0.1853, + "step": 23423, + "teacher_loss": 0.1730896234512329 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.606796383857727, + "learning_rate": 2.9879229606659903e-06, + "loss": 0.1818, + "step": 23424, + "teacher_loss": 0.13458675146102905 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.16351720690727234, + "learning_rate": 2.9865627901559623e-06, + "loss": 0.15, + "step": 23425, + "teacher_loss": 0.1484694480895996 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.23135894536972046, + "learning_rate": 2.985202895078033e-06, + "loss": 0.2008, + "step": 23426, + "teacher_loss": 0.19740816950798035 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.45961982011795044, + "learning_rate": 2.983843275463381e-06, + "loss": 0.2194, + "step": 23427, + "teacher_loss": 0.19275733828544617 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.35334160923957825, + "learning_rate": 2.9824839313431712e-06, + "loss": 0.1806, + "step": 23428, + "teacher_loss": 0.16142696142196655 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.44127005338668823, + "learning_rate": 2.9811248627485754e-06, + "loss": 0.2098, + "step": 23429, + "teacher_loss": 0.18412485718727112 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.2662040591239929, + "learning_rate": 2.9797660697107554e-06, + "loss": 0.1837, + "step": 23430, + "teacher_loss": 0.174494206905365 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.34847182035446167, + "learning_rate": 2.978407552260856e-06, + "loss": 0.2067, + "step": 23431, + "teacher_loss": 0.19097909331321716 + }, + { + "compression_loss": 0.0, + "epoch": 4.23, + "label_loss": 0.18753407895565033, + "learning_rate": 2.977049310430031e-06, + "loss": 0.1502, + "step": 23432, + "teacher_loss": 0.14602018892765045 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.37232252955436707, + "learning_rate": 2.9756913442494204e-06, + "loss": 0.18, + "step": 23433, + "teacher_loss": 0.15865576267242432 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5214018821716309, + "learning_rate": 2.974333653750154e-06, + "loss": 0.1875, + "step": 23434, + "teacher_loss": 0.15038429200649261 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.3771612048149109, + "learning_rate": 2.9729762389633625e-06, + "loss": 0.3124, + "step": 23435, + "teacher_loss": 0.3051683306694031 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.3065650463104248, + "learning_rate": 2.9716190999201687e-06, + "loss": 0.2311, + "step": 23436, + "teacher_loss": 0.2226601541042328 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.7297235131263733, + "learning_rate": 2.970262236651688e-06, + "loss": 0.275, + "step": 23437, + "teacher_loss": 0.22447621822357178 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.6516412496566772, + "learning_rate": 2.968905649189019e-06, + "loss": 0.214, + "step": 23438, + "teacher_loss": 0.1653345823287964 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.35279861092567444, + "learning_rate": 2.9675493375632796e-06, + "loss": 0.225, + "step": 23439, + "teacher_loss": 0.21080461144447327 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.37092798948287964, + "learning_rate": 2.9661933018055586e-06, + "loss": 0.273, + "step": 23440, + "teacher_loss": 0.26208558678627014 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5320628881454468, + "learning_rate": 2.9648375419469397e-06, + "loss": 0.2064, + "step": 23441, + "teacher_loss": 0.17024663090705872 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.1468735933303833, + "learning_rate": 2.9634820580185195e-06, + "loss": 0.1586, + "step": 23442, + "teacher_loss": 0.15995411574840546 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.2772144675254822, + "learning_rate": 2.962126850051368e-06, + "loss": 0.148, + "step": 23443, + "teacher_loss": 0.13365775346755981 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.6996981501579285, + "learning_rate": 2.9607719180765524e-06, + "loss": 0.4558, + "step": 23444, + "teacher_loss": 0.4287361800670624 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.765461802482605, + "learning_rate": 2.9594172621251408e-06, + "loss": 0.2478, + "step": 23445, + "teacher_loss": 0.19028347730636597 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.4473523795604706, + "learning_rate": 2.958062882228197e-06, + "loss": 0.1918, + "step": 23446, + "teacher_loss": 0.16336822509765625 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.3348191976547241, + "learning_rate": 2.9567087784167642e-06, + "loss": 0.2289, + "step": 23447, + "teacher_loss": 0.21716171503067017 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.1583988219499588, + "learning_rate": 2.9553549507218893e-06, + "loss": 0.1777, + "step": 23448, + "teacher_loss": 0.17988064885139465 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.765503466129303, + "learning_rate": 2.954001399174619e-06, + "loss": 0.2211, + "step": 23449, + "teacher_loss": 0.1606222540140152 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.29384398460388184, + "learning_rate": 2.9526481238059803e-06, + "loss": 0.2113, + "step": 23450, + "teacher_loss": 0.20210814476013184 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.4396364092826843, + "learning_rate": 2.951295124646994e-06, + "loss": 0.2042, + "step": 23451, + "teacher_loss": 0.17804238200187683 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.13660049438476562, + "learning_rate": 2.9499424017286926e-06, + "loss": 0.1589, + "step": 23452, + "teacher_loss": 0.1613357961177826 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5394812822341919, + "learning_rate": 2.948589955082085e-06, + "loss": 0.1788, + "step": 23453, + "teacher_loss": 0.13869966566562653 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.19466203451156616, + "learning_rate": 2.9472377847381733e-06, + "loss": 0.1793, + "step": 23454, + "teacher_loss": 0.17760831117630005 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.4476737678050995, + "learning_rate": 2.945885890727964e-06, + "loss": 0.1966, + "step": 23455, + "teacher_loss": 0.16869601607322693 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.2089543342590332, + "learning_rate": 2.944534273082454e-06, + "loss": 0.1628, + "step": 23456, + "teacher_loss": 0.1577066034078598 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.22364526987075806, + "learning_rate": 2.943182931832626e-06, + "loss": 0.1641, + "step": 23457, + "teacher_loss": 0.15749265253543854 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5768837928771973, + "learning_rate": 2.941831867009464e-06, + "loss": 0.2473, + "step": 23458, + "teacher_loss": 0.21065039932727814 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.14140355587005615, + "learning_rate": 2.940481078643949e-06, + "loss": 0.1988, + "step": 23459, + "teacher_loss": 0.20523279905319214 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.744377076625824, + "learning_rate": 2.9391305667670443e-06, + "loss": 0.2079, + "step": 23460, + "teacher_loss": 0.14824740588665009 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.8212010264396667, + "learning_rate": 2.9377803314097136e-06, + "loss": 0.2824, + "step": 23461, + "teacher_loss": 0.22252118587493896 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.4029175043106079, + "learning_rate": 2.93643037260292e-06, + "loss": 0.2731, + "step": 23462, + "teacher_loss": 0.2586822211742401 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5088284015655518, + "learning_rate": 2.935080690377606e-06, + "loss": 0.2472, + "step": 23463, + "teacher_loss": 0.2181018590927124 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.38877072930336, + "learning_rate": 2.9337312847647187e-06, + "loss": 0.2055, + "step": 23464, + "teacher_loss": 0.18508122861385345 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.31336140632629395, + "learning_rate": 2.9323821557952007e-06, + "loss": 0.1863, + "step": 23465, + "teacher_loss": 0.1722373068332672 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5628397464752197, + "learning_rate": 2.931033303499975e-06, + "loss": 0.3491, + "step": 23466, + "teacher_loss": 0.32539820671081543 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.49442926049232483, + "learning_rate": 2.929684727909974e-06, + "loss": 0.2618, + "step": 23467, + "teacher_loss": 0.23598593473434448 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.3347659707069397, + "learning_rate": 2.928336429056111e-06, + "loss": 0.2031, + "step": 23468, + "teacher_loss": 0.18851131200790405 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.3287886381149292, + "learning_rate": 2.9269884069693053e-06, + "loss": 0.1659, + "step": 23469, + "teacher_loss": 0.1477912813425064 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5561468005180359, + "learning_rate": 2.925640661680454e-06, + "loss": 0.244, + "step": 23470, + "teacher_loss": 0.20930270850658417 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.1431950330734253, + "learning_rate": 2.9242931932204614e-06, + "loss": 0.1461, + "step": 23471, + "teacher_loss": 0.14641883969306946 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5328336954116821, + "learning_rate": 2.9229460016202254e-06, + "loss": 0.2277, + "step": 23472, + "teacher_loss": 0.19380444288253784 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.6786975860595703, + "learning_rate": 2.9215990869106248e-06, + "loss": 0.2125, + "step": 23473, + "teacher_loss": 0.1606781780719757 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.10365578532218933, + "learning_rate": 2.920252449122543e-06, + "loss": 0.1479, + "step": 23474, + "teacher_loss": 0.15279698371887207 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.28274479508399963, + "learning_rate": 2.9189060882868606e-06, + "loss": 0.2363, + "step": 23475, + "teacher_loss": 0.23114481568336487 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.339595764875412, + "learning_rate": 2.9175600044344364e-06, + "loss": 0.1642, + "step": 23476, + "teacher_loss": 0.1447519212961197 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.1088632345199585, + "learning_rate": 2.9162141975961404e-06, + "loss": 0.1823, + "step": 23477, + "teacher_loss": 0.1904122531414032 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5974847078323364, + "learning_rate": 2.9148686678028197e-06, + "loss": 0.5551, + "step": 23478, + "teacher_loss": 0.55040442943573 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.4665134847164154, + "learning_rate": 2.913523415085328e-06, + "loss": 0.23, + "step": 23479, + "teacher_loss": 0.20372800529003143 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.2874928116798401, + "learning_rate": 2.9121784394745105e-06, + "loss": 0.1422, + "step": 23480, + "teacher_loss": 0.126010924577713 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5245132446289062, + "learning_rate": 2.910833741001196e-06, + "loss": 0.2764, + "step": 23481, + "teacher_loss": 0.24888616800308228 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.21251609921455383, + "learning_rate": 2.9094893196962197e-06, + "loss": 0.1747, + "step": 23482, + "teacher_loss": 0.17052951455116272 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.24154171347618103, + "learning_rate": 2.908145175590407e-06, + "loss": 0.1326, + "step": 23483, + "teacher_loss": 0.12048736214637756 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.5144477486610413, + "learning_rate": 2.9068013087145684e-06, + "loss": 0.3048, + "step": 23484, + "teacher_loss": 0.2815244495868683 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.4105534851551056, + "learning_rate": 2.9054577190995175e-06, + "loss": 0.2053, + "step": 23485, + "teacher_loss": 0.1824992597103119 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.42366823554039, + "learning_rate": 2.9041144067760662e-06, + "loss": 0.1809, + "step": 23486, + "teacher_loss": 0.15392887592315674 + }, + { + "compression_loss": 0.0, + "epoch": 4.24, + "label_loss": 0.7007098197937012, + "learning_rate": 2.9027713717750046e-06, + "loss": 0.2716, + "step": 23487, + "teacher_loss": 0.22394943237304688 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.2165856659412384, + "learning_rate": 2.9014286141271173e-06, + "loss": 0.1716, + "step": 23488, + "teacher_loss": 0.16656649112701416 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.23297366499900818, + "learning_rate": 2.9000861338632074e-06, + "loss": 0.2154, + "step": 23489, + "teacher_loss": 0.21348971128463745 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4030412435531616, + "learning_rate": 2.898743931014045e-06, + "loss": 0.2045, + "step": 23490, + "teacher_loss": 0.18242451548576355 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4813503921031952, + "learning_rate": 2.8974020056103994e-06, + "loss": 0.1929, + "step": 23491, + "teacher_loss": 0.16082313656806946 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.7757197618484497, + "learning_rate": 2.8960603576830408e-06, + "loss": 0.2883, + "step": 23492, + "teacher_loss": 0.23414504528045654 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.15720973908901215, + "learning_rate": 2.894718987262731e-06, + "loss": 0.2029, + "step": 23493, + "teacher_loss": 0.20799612998962402 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.2559800148010254, + "learning_rate": 2.8933778943802193e-06, + "loss": 0.1612, + "step": 23494, + "teacher_loss": 0.15063440799713135 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.5785425901412964, + "learning_rate": 2.892037079066252e-06, + "loss": 0.4117, + "step": 23495, + "teacher_loss": 0.39316946268081665 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.8459206819534302, + "learning_rate": 2.8906965413515803e-06, + "loss": 0.2007, + "step": 23496, + "teacher_loss": 0.12906435132026672 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.08664538711309433, + "learning_rate": 2.8893562812669254e-06, + "loss": 0.1448, + "step": 23497, + "teacher_loss": 0.15126188099384308 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.35848987102508545, + "learning_rate": 2.888016298843022e-06, + "loss": 0.161, + "step": 23498, + "teacher_loss": 0.13903038203716278 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.5023984313011169, + "learning_rate": 2.886676594110595e-06, + "loss": 0.2098, + "step": 23499, + "teacher_loss": 0.17725443840026855 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.636722207069397, + "learning_rate": 2.8853371671003547e-06, + "loss": 0.2279, + "step": 23500, + "teacher_loss": 0.1824699342250824 + }, + { + "epoch": 4.25, + "eval_exact_match": 80.47303689687796, + "eval_f1": 87.72751099830998, + "step": 23500 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.17988750338554382, + "learning_rate": 2.8839980178430055e-06, + "loss": 0.1606, + "step": 23501, + "teacher_loss": 0.1584232747554779 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.20040926337242126, + "learning_rate": 2.8826591463692638e-06, + "loss": 0.1946, + "step": 23502, + "teacher_loss": 0.193989560008049 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.6451300382614136, + "learning_rate": 2.881320552709817e-06, + "loss": 0.2256, + "step": 23503, + "teacher_loss": 0.1790080964565277 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4379386305809021, + "learning_rate": 2.879982236895354e-06, + "loss": 0.1983, + "step": 23504, + "teacher_loss": 0.17168211936950684 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.8508151769638062, + "learning_rate": 2.878644198956558e-06, + "loss": 0.2294, + "step": 23505, + "teacher_loss": 0.16032646596431732 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.18794825673103333, + "learning_rate": 2.8773064389241153e-06, + "loss": 0.1556, + "step": 23506, + "teacher_loss": 0.1519886553287506 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.35050997138023376, + "learning_rate": 2.8759689568286863e-06, + "loss": 0.2118, + "step": 23507, + "teacher_loss": 0.19633370637893677 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.6564970016479492, + "learning_rate": 2.8746317527009375e-06, + "loss": 0.2841, + "step": 23508, + "teacher_loss": 0.24274101853370667 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3475725054740906, + "learning_rate": 2.873294826571535e-06, + "loss": 0.1603, + "step": 23509, + "teacher_loss": 0.13952046632766724 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4035768210887909, + "learning_rate": 2.8719581784711217e-06, + "loss": 0.221, + "step": 23510, + "teacher_loss": 0.20070582628250122 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3082427382469177, + "learning_rate": 2.8706218084303393e-06, + "loss": 0.198, + "step": 23511, + "teacher_loss": 0.18576562404632568 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.45154741406440735, + "learning_rate": 2.869285716479841e-06, + "loss": 0.2966, + "step": 23512, + "teacher_loss": 0.279381662607193 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.37201517820358276, + "learning_rate": 2.867949902650252e-06, + "loss": 0.2036, + "step": 23513, + "teacher_loss": 0.18483304977416992 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3767436742782593, + "learning_rate": 2.86661436697219e-06, + "loss": 0.1954, + "step": 23514, + "teacher_loss": 0.1752319037914276 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3800477981567383, + "learning_rate": 2.8652791094762886e-06, + "loss": 0.1916, + "step": 23515, + "teacher_loss": 0.1706707924604416 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4575875997543335, + "learning_rate": 2.8639441301931587e-06, + "loss": 0.2909, + "step": 23516, + "teacher_loss": 0.2723338007926941 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.9758557677268982, + "learning_rate": 2.8626094291533965e-06, + "loss": 0.2709, + "step": 23517, + "teacher_loss": 0.19253957271575928 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3562992513179779, + "learning_rate": 2.8612750063876135e-06, + "loss": 0.1919, + "step": 23518, + "teacher_loss": 0.17361171543598175 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.41558390855789185, + "learning_rate": 2.8599408619264036e-06, + "loss": 0.2252, + "step": 23519, + "teacher_loss": 0.20408935844898224 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.20573261380195618, + "learning_rate": 2.858606995800348e-06, + "loss": 0.177, + "step": 23520, + "teacher_loss": 0.17384850978851318 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.37960296869277954, + "learning_rate": 2.8572734080400315e-06, + "loss": 0.2047, + "step": 23521, + "teacher_loss": 0.18526498973369598 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.6667327880859375, + "learning_rate": 2.8559400986760344e-06, + "loss": 0.3079, + "step": 23522, + "teacher_loss": 0.2679884135723114 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.5486441254615784, + "learning_rate": 2.854607067738917e-06, + "loss": 0.2358, + "step": 23523, + "teacher_loss": 0.20101585984230042 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.47195035219192505, + "learning_rate": 2.8532743152592467e-06, + "loss": 0.1769, + "step": 23524, + "teacher_loss": 0.1441117227077484 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.38555091619491577, + "learning_rate": 2.851941841267581e-06, + "loss": 0.2055, + "step": 23525, + "teacher_loss": 0.18544459342956543 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.20950210094451904, + "learning_rate": 2.850609645794465e-06, + "loss": 0.219, + "step": 23526, + "teacher_loss": 0.22002992033958435 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.26769882440567017, + "learning_rate": 2.849277728870446e-06, + "loss": 0.2137, + "step": 23527, + "teacher_loss": 0.2076951563358307 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.48645126819610596, + "learning_rate": 2.847946090526056e-06, + "loss": 0.2054, + "step": 23528, + "teacher_loss": 0.17412686347961426 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.36318206787109375, + "learning_rate": 2.8466147307918283e-06, + "loss": 0.245, + "step": 23529, + "teacher_loss": 0.23185402154922485 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4255761504173279, + "learning_rate": 2.845283649698291e-06, + "loss": 0.2257, + "step": 23530, + "teacher_loss": 0.2034575641155243 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.2936986982822418, + "learning_rate": 2.843952847275955e-06, + "loss": 0.2339, + "step": 23531, + "teacher_loss": 0.2272147536277771 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.4340837895870209, + "learning_rate": 2.8426223235553367e-06, + "loss": 0.2342, + "step": 23532, + "teacher_loss": 0.2120259553194046 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.19881585240364075, + "learning_rate": 2.841292078566936e-06, + "loss": 0.1729, + "step": 23533, + "teacher_loss": 0.17004451155662537 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.6113169193267822, + "learning_rate": 2.839962112341253e-06, + "loss": 0.2142, + "step": 23534, + "teacher_loss": 0.1700935661792755 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.09825917333364487, + "learning_rate": 2.838632424908786e-06, + "loss": 0.1861, + "step": 23535, + "teacher_loss": 0.19589583575725555 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3130182921886444, + "learning_rate": 2.8373030163000126e-06, + "loss": 0.1466, + "step": 23536, + "teacher_loss": 0.12810730934143066 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.13756752014160156, + "learning_rate": 2.835973886545414e-06, + "loss": 0.1571, + "step": 23537, + "teacher_loss": 0.1592382788658142 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.2803334593772888, + "learning_rate": 2.834645035675469e-06, + "loss": 0.2013, + "step": 23538, + "teacher_loss": 0.1924774944782257 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.5822422504425049, + "learning_rate": 2.8333164637206367e-06, + "loss": 0.3283, + "step": 23539, + "teacher_loss": 0.30004915595054626 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.3407885432243347, + "learning_rate": 2.8319881707113825e-06, + "loss": 0.2123, + "step": 23540, + "teacher_loss": 0.1980486661195755 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.46780824661254883, + "learning_rate": 2.8306601566781542e-06, + "loss": 0.2421, + "step": 23541, + "teacher_loss": 0.21704059839248657 + }, + { + "compression_loss": 0.0, + "epoch": 4.25, + "label_loss": 0.47814232110977173, + "learning_rate": 2.829332421651404e-06, + "loss": 0.1951, + "step": 23542, + "teacher_loss": 0.163700670003891 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.15735790133476257, + "learning_rate": 2.8280049656615755e-06, + "loss": 0.1926, + "step": 23543, + "teacher_loss": 0.19653186202049255 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.3475435972213745, + "learning_rate": 2.826677788739096e-06, + "loss": 0.1892, + "step": 23544, + "teacher_loss": 0.1716184914112091 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.4281400144100189, + "learning_rate": 2.8253508909143967e-06, + "loss": 0.2027, + "step": 23545, + "teacher_loss": 0.17761413753032684 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.29032742977142334, + "learning_rate": 2.8240242722179042e-06, + "loss": 0.2031, + "step": 23546, + "teacher_loss": 0.19338306784629822 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.3970162272453308, + "learning_rate": 2.822697932680025e-06, + "loss": 0.1833, + "step": 23547, + "teacher_loss": 0.15956871211528778 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.26178571581840515, + "learning_rate": 2.8213718723311728e-06, + "loss": 0.2235, + "step": 23548, + "teacher_loss": 0.21921241283416748 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.49028852581977844, + "learning_rate": 2.8200460912017545e-06, + "loss": 0.2256, + "step": 23549, + "teacher_loss": 0.19617339968681335 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.29364901781082153, + "learning_rate": 2.818720589322163e-06, + "loss": 0.1964, + "step": 23550, + "teacher_loss": 0.18561695516109467 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.5711995363235474, + "learning_rate": 2.817395366722782e-06, + "loss": 0.251, + "step": 23551, + "teacher_loss": 0.21544548869132996 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.39511638879776, + "learning_rate": 2.816070423433999e-06, + "loss": 0.2, + "step": 23552, + "teacher_loss": 0.1782768964767456 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.2171490341424942, + "learning_rate": 2.814745759486198e-06, + "loss": 0.1814, + "step": 23553, + "teacher_loss": 0.17737993597984314 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.4558674693107605, + "learning_rate": 2.8134213749097382e-06, + "loss": 0.2352, + "step": 23554, + "teacher_loss": 0.21068216860294342 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.4121059775352478, + "learning_rate": 2.8120972697349886e-06, + "loss": 0.1769, + "step": 23555, + "teacher_loss": 0.15071095526218414 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.3422475755214691, + "learning_rate": 2.810773443992313e-06, + "loss": 0.222, + "step": 23556, + "teacher_loss": 0.20867910981178284 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.32564103603363037, + "learning_rate": 2.8094498977120504e-06, + "loss": 0.1964, + "step": 23557, + "teacher_loss": 0.18203064799308777 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.2715091109275818, + "learning_rate": 2.808126630924555e-06, + "loss": 0.2567, + "step": 23558, + "teacher_loss": 0.2550812363624573 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.5449018478393555, + "learning_rate": 2.806803643660166e-06, + "loss": 0.1872, + "step": 23559, + "teacher_loss": 0.1474830061197281 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.43778133392333984, + "learning_rate": 2.805480935949211e-06, + "loss": 0.2486, + "step": 23560, + "teacher_loss": 0.2275719940662384 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.6413131952285767, + "learning_rate": 2.8041585078220105e-06, + "loss": 0.2427, + "step": 23561, + "teacher_loss": 0.19837774336338043 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.3337388038635254, + "learning_rate": 2.8028363593088985e-06, + "loss": 0.2628, + "step": 23562, + "teacher_loss": 0.2548964321613312 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.5354351997375488, + "learning_rate": 2.801514490440179e-06, + "loss": 0.1855, + "step": 23563, + "teacher_loss": 0.14658969640731812 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.24030691385269165, + "learning_rate": 2.800192901246153e-06, + "loss": 0.1664, + "step": 23564, + "teacher_loss": 0.15816043317317963 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.9059640765190125, + "learning_rate": 2.7988715917571327e-06, + "loss": 0.2796, + "step": 23565, + "teacher_loss": 0.20996958017349243 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.47720617055892944, + "learning_rate": 2.7975505620034073e-06, + "loss": 0.2224, + "step": 23566, + "teacher_loss": 0.19410720467567444 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.3822779059410095, + "learning_rate": 2.7962298120152578e-06, + "loss": 0.1807, + "step": 23567, + "teacher_loss": 0.15834113955497742 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.4422036409378052, + "learning_rate": 2.7949093418229717e-06, + "loss": 0.2449, + "step": 23568, + "teacher_loss": 0.22297346591949463 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.35386139154434204, + "learning_rate": 2.793589151456824e-06, + "loss": 0.1909, + "step": 23569, + "teacher_loss": 0.1728121042251587 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.2762065827846527, + "learning_rate": 2.792269240947076e-06, + "loss": 0.1712, + "step": 23570, + "teacher_loss": 0.1594999134540558 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.25592854619026184, + "learning_rate": 2.790949610323994e-06, + "loss": 0.1631, + "step": 23571, + "teacher_loss": 0.15273168683052063 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.791628360748291, + "learning_rate": 2.789630259617838e-06, + "loss": 0.2369, + "step": 23572, + "teacher_loss": 0.17526771128177643 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.4047967791557312, + "learning_rate": 2.7883111888588507e-06, + "loss": 0.2387, + "step": 23573, + "teacher_loss": 0.22019270062446594 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.3563660979270935, + "learning_rate": 2.7869923980772667e-06, + "loss": 0.1992, + "step": 23574, + "teacher_loss": 0.1817789375782013 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.9154502153396606, + "learning_rate": 2.7856738873033395e-06, + "loss": 0.2635, + "step": 23575, + "teacher_loss": 0.1910582184791565 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.861147403717041, + "learning_rate": 2.7843556565672885e-06, + "loss": 0.298, + "step": 23576, + "teacher_loss": 0.23542526364326477 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.4469071626663208, + "learning_rate": 2.7830377058993344e-06, + "loss": 0.2288, + "step": 23577, + "teacher_loss": 0.2045501470565796 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.37797120213508606, + "learning_rate": 2.7817200353296986e-06, + "loss": 0.185, + "step": 23578, + "teacher_loss": 0.163558691740036 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.7755628824234009, + "learning_rate": 2.7804026448885926e-06, + "loss": 0.2373, + "step": 23579, + "teacher_loss": 0.17747154831886292 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.2284199297428131, + "learning_rate": 2.7790855346062135e-06, + "loss": 0.2513, + "step": 23580, + "teacher_loss": 0.2538015842437744 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.08206374943256378, + "learning_rate": 2.7777687045127644e-06, + "loss": 0.1526, + "step": 23581, + "teacher_loss": 0.16046173870563507 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.8750606775283813, + "learning_rate": 2.7764521546384366e-06, + "loss": 0.2926, + "step": 23582, + "teacher_loss": 0.22783727943897247 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.6559481620788574, + "learning_rate": 2.7751358850134106e-06, + "loss": 0.1822, + "step": 23583, + "teacher_loss": 0.1295488327741623 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.6263271570205688, + "learning_rate": 2.7738198956678663e-06, + "loss": 0.2224, + "step": 23584, + "teacher_loss": 0.17753730714321136 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.27602440118789673, + "learning_rate": 2.7725041866319788e-06, + "loss": 0.2094, + "step": 23585, + "teacher_loss": 0.20200704038143158 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.14994439482688904, + "learning_rate": 2.771188757935908e-06, + "loss": 0.1787, + "step": 23586, + "teacher_loss": 0.18185602128505707 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.5035929679870605, + "learning_rate": 2.7698736096098144e-06, + "loss": 0.4151, + "step": 23587, + "teacher_loss": 0.40522313117980957 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.601744532585144, + "learning_rate": 2.7685587416838535e-06, + "loss": 0.197, + "step": 23588, + "teacher_loss": 0.1520429253578186 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.7705152630805969, + "learning_rate": 2.767244154188167e-06, + "loss": 0.4206, + "step": 23589, + "teacher_loss": 0.38169431686401367 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.7192791700363159, + "learning_rate": 2.7659298471529005e-06, + "loss": 0.2534, + "step": 23590, + "teacher_loss": 0.20158948004245758 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.5837382674217224, + "learning_rate": 2.7646158206081777e-06, + "loss": 0.2236, + "step": 23591, + "teacher_loss": 0.183608278632164 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.22908684611320496, + "learning_rate": 2.763302074584132e-06, + "loss": 0.2104, + "step": 23592, + "teacher_loss": 0.20830616354942322 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.6608120203018188, + "learning_rate": 2.761988609110884e-06, + "loss": 0.2222, + "step": 23593, + "teacher_loss": 0.17351379990577698 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.9826804399490356, + "learning_rate": 2.7606754242185437e-06, + "loss": 0.2686, + "step": 23594, + "teacher_loss": 0.1892877072095871 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.42528021335601807, + "learning_rate": 2.7593625199372236e-06, + "loss": 0.2119, + "step": 23595, + "teacher_loss": 0.18822228908538818 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.353785902261734, + "learning_rate": 2.7580498962970187e-06, + "loss": 0.1658, + "step": 23596, + "teacher_loss": 0.14488321542739868 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.6266546845436096, + "learning_rate": 2.756737553328025e-06, + "loss": 0.2645, + "step": 23597, + "teacher_loss": 0.22423425316810608 + }, + { + "compression_loss": 0.0, + "epoch": 4.26, + "label_loss": 0.5388700366020203, + "learning_rate": 2.7554254910603364e-06, + "loss": 0.2224, + "step": 23598, + "teacher_loss": 0.18726500868797302 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.48659372329711914, + "learning_rate": 2.7541137095240263e-06, + "loss": 0.2489, + "step": 23599, + "teacher_loss": 0.22252321243286133 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6074894666671753, + "learning_rate": 2.752802208749177e-06, + "loss": 0.1936, + "step": 23600, + "teacher_loss": 0.14758385717868805 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6128398180007935, + "learning_rate": 2.7514909887658514e-06, + "loss": 0.1831, + "step": 23601, + "teacher_loss": 0.13529758155345917 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5379999876022339, + "learning_rate": 2.750180049604113e-06, + "loss": 0.2137, + "step": 23602, + "teacher_loss": 0.17761120200157166 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5924365520477295, + "learning_rate": 2.748869391294025e-06, + "loss": 0.275, + "step": 23603, + "teacher_loss": 0.2397579848766327 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.1752086728811264, + "learning_rate": 2.7475590138656266e-06, + "loss": 0.1171, + "step": 23604, + "teacher_loss": 0.11063025891780853 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3447811007499695, + "learning_rate": 2.7462489173489636e-06, + "loss": 0.2136, + "step": 23605, + "teacher_loss": 0.19902482628822327 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.38671019673347473, + "learning_rate": 2.7449391017740806e-06, + "loss": 0.1852, + "step": 23606, + "teacher_loss": 0.16286440193653107 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5928007960319519, + "learning_rate": 2.743629567170995e-06, + "loss": 0.2178, + "step": 23607, + "teacher_loss": 0.176174595952034 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5205186009407043, + "learning_rate": 2.7423203135697396e-06, + "loss": 0.2643, + "step": 23608, + "teacher_loss": 0.2357889860868454 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.29240235686302185, + "learning_rate": 2.74101134100033e-06, + "loss": 0.2249, + "step": 23609, + "teacher_loss": 0.2174319624900818 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5066640973091125, + "learning_rate": 2.739702649492778e-06, + "loss": 0.3548, + "step": 23610, + "teacher_loss": 0.3379090428352356 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.17395423352718353, + "learning_rate": 2.738394239077079e-06, + "loss": 0.1831, + "step": 23611, + "teacher_loss": 0.18415626883506775 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.15302030742168427, + "learning_rate": 2.737086109783244e-06, + "loss": 0.1991, + "step": 23612, + "teacher_loss": 0.20417320728302002 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.2503766715526581, + "learning_rate": 2.735778261641259e-06, + "loss": 0.1552, + "step": 23613, + "teacher_loss": 0.14459940791130066 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.16209635138511658, + "learning_rate": 2.734470694681104e-06, + "loss": 0.1768, + "step": 23614, + "teacher_loss": 0.17839598655700684 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.43903863430023193, + "learning_rate": 2.733163408932762e-06, + "loss": 0.2001, + "step": 23615, + "teacher_loss": 0.17356491088867188 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6795310974121094, + "learning_rate": 2.731856404426209e-06, + "loss": 0.2194, + "step": 23616, + "teacher_loss": 0.16825395822525024 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5856727361679077, + "learning_rate": 2.7305496811914033e-06, + "loss": 0.204, + "step": 23617, + "teacher_loss": 0.16160088777542114 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.34873586893081665, + "learning_rate": 2.7292432392583077e-06, + "loss": 0.2508, + "step": 23618, + "teacher_loss": 0.23995625972747803 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.39873868227005005, + "learning_rate": 2.7279370786568785e-06, + "loss": 0.1802, + "step": 23619, + "teacher_loss": 0.15586526691913605 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6993827819824219, + "learning_rate": 2.726631199417055e-06, + "loss": 0.3163, + "step": 23620, + "teacher_loss": 0.2737899124622345 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.4949374794960022, + "learning_rate": 2.7253256015687818e-06, + "loss": 0.2702, + "step": 23621, + "teacher_loss": 0.24525438249111176 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3132849335670471, + "learning_rate": 2.7240202851419944e-06, + "loss": 0.224, + "step": 23622, + "teacher_loss": 0.21411174535751343 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.49436455965042114, + "learning_rate": 2.722715250166616e-06, + "loss": 0.3305, + "step": 23623, + "teacher_loss": 0.31229573488235474 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.23616701364517212, + "learning_rate": 2.721410496672559e-06, + "loss": 0.1642, + "step": 23624, + "teacher_loss": 0.15620103478431702 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6335381865501404, + "learning_rate": 2.720106024689755e-06, + "loss": 0.1876, + "step": 23625, + "teacher_loss": 0.13805922865867615 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.33473601937294006, + "learning_rate": 2.7188018342481025e-06, + "loss": 0.1575, + "step": 23626, + "teacher_loss": 0.1377830058336258 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3796151876449585, + "learning_rate": 2.7174979253775e-06, + "loss": 0.2598, + "step": 23627, + "teacher_loss": 0.24652042984962463 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.29367536306381226, + "learning_rate": 2.7161942981078453e-06, + "loss": 0.1931, + "step": 23628, + "teacher_loss": 0.18192274868488312 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.2525085508823395, + "learning_rate": 2.714890952469029e-06, + "loss": 0.1905, + "step": 23629, + "teacher_loss": 0.18362337350845337 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.4037827253341675, + "learning_rate": 2.713587888490928e-06, + "loss": 0.2182, + "step": 23630, + "teacher_loss": 0.19760766625404358 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.8041902780532837, + "learning_rate": 2.7122851062034186e-06, + "loss": 0.2514, + "step": 23631, + "teacher_loss": 0.18992453813552856 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.4077351689338684, + "learning_rate": 2.710982605636377e-06, + "loss": 0.2412, + "step": 23632, + "teacher_loss": 0.222703754901886 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.41937679052352905, + "learning_rate": 2.7096803868196546e-06, + "loss": 0.2662, + "step": 23633, + "teacher_loss": 0.24914045631885529 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3842611312866211, + "learning_rate": 2.708378449783113e-06, + "loss": 0.1823, + "step": 23634, + "teacher_loss": 0.15986892580986023 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.15760532021522522, + "learning_rate": 2.7070767945566054e-06, + "loss": 0.1607, + "step": 23635, + "teacher_loss": 0.16107967495918274 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3653634190559387, + "learning_rate": 2.705775421169971e-06, + "loss": 0.2363, + "step": 23636, + "teacher_loss": 0.22196470201015472 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.857946515083313, + "learning_rate": 2.704474329653037e-06, + "loss": 0.3949, + "step": 23637, + "teacher_loss": 0.34349387884140015 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.22090455889701843, + "learning_rate": 2.7031735200356523e-06, + "loss": 0.1788, + "step": 23638, + "teacher_loss": 0.17416216433048248 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3113109767436981, + "learning_rate": 2.7018729923476306e-06, + "loss": 0.1889, + "step": 23639, + "teacher_loss": 0.1753501296043396 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.537804901599884, + "learning_rate": 2.7005727466187847e-06, + "loss": 0.1807, + "step": 23640, + "teacher_loss": 0.14101248979568481 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.32615751028060913, + "learning_rate": 2.699272782878931e-06, + "loss": 0.2167, + "step": 23641, + "teacher_loss": 0.20454160869121552 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5487974882125854, + "learning_rate": 2.697973101157877e-06, + "loss": 0.2565, + "step": 23642, + "teacher_loss": 0.2240055501461029 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6533028483390808, + "learning_rate": 2.6966737014854114e-06, + "loss": 0.1986, + "step": 23643, + "teacher_loss": 0.14804288744926453 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.20450858771800995, + "learning_rate": 2.6953745838913314e-06, + "loss": 0.2051, + "step": 23644, + "teacher_loss": 0.20514845848083496 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.573708176612854, + "learning_rate": 2.6940757484054246e-06, + "loss": 0.2153, + "step": 23645, + "teacher_loss": 0.1755288541316986 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.23085537552833557, + "learning_rate": 2.6927771950574625e-06, + "loss": 0.1832, + "step": 23646, + "teacher_loss": 0.17785832285881042 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.6519721150398254, + "learning_rate": 2.69147892387722e-06, + "loss": 0.3159, + "step": 23647, + "teacher_loss": 0.2785448431968689 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.9092443585395813, + "learning_rate": 2.6901809348944674e-06, + "loss": 0.2889, + "step": 23648, + "teacher_loss": 0.2200058549642563 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.31982648372650146, + "learning_rate": 2.688883228138955e-06, + "loss": 0.1735, + "step": 23649, + "teacher_loss": 0.15729407966136932 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3426324725151062, + "learning_rate": 2.6875858036404418e-06, + "loss": 0.1749, + "step": 23650, + "teacher_loss": 0.1563049554824829 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.3827723264694214, + "learning_rate": 2.6862886614286693e-06, + "loss": 0.1838, + "step": 23651, + "teacher_loss": 0.16174717247486115 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.5085392594337463, + "learning_rate": 2.6849918015333787e-06, + "loss": 0.2858, + "step": 23652, + "teacher_loss": 0.2610986828804016 + }, + { + "compression_loss": 0.0, + "epoch": 4.27, + "label_loss": 0.2684899568557739, + "learning_rate": 2.6836952239843077e-06, + "loss": 0.189, + "step": 23653, + "teacher_loss": 0.1801636815071106 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.303982675075531, + "learning_rate": 2.682398928811176e-06, + "loss": 0.1968, + "step": 23654, + "teacher_loss": 0.18491417169570923 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3728879690170288, + "learning_rate": 2.68110291604371e-06, + "loss": 0.2079, + "step": 23655, + "teacher_loss": 0.18956203758716583 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.33532893657684326, + "learning_rate": 2.679807185711616e-06, + "loss": 0.226, + "step": 23656, + "teacher_loss": 0.2139042615890503 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.20250120759010315, + "learning_rate": 2.678511737844606e-06, + "loss": 0.1454, + "step": 23657, + "teacher_loss": 0.13900502026081085 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3045519292354584, + "learning_rate": 2.677216572472384e-06, + "loss": 0.1534, + "step": 23658, + "teacher_loss": 0.13655096292495728 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.2404729723930359, + "learning_rate": 2.675921689624636e-06, + "loss": 0.2556, + "step": 23659, + "teacher_loss": 0.25723642110824585 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.16769632697105408, + "learning_rate": 2.6746270893310545e-06, + "loss": 0.2022, + "step": 23660, + "teacher_loss": 0.20600873231887817 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.577185869216919, + "learning_rate": 2.673332771621324e-06, + "loss": 0.1911, + "step": 23661, + "teacher_loss": 0.14820022881031036 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.547667384147644, + "learning_rate": 2.672038736525113e-06, + "loss": 0.3426, + "step": 23662, + "teacher_loss": 0.3198525309562683 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.2580743432044983, + "learning_rate": 2.670744984072094e-06, + "loss": 0.2016, + "step": 23663, + "teacher_loss": 0.19527548551559448 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.4142838716506958, + "learning_rate": 2.6694515142919258e-06, + "loss": 0.2567, + "step": 23664, + "teacher_loss": 0.23914137482643127 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.26650261878967285, + "learning_rate": 2.6681583272142653e-06, + "loss": 0.1881, + "step": 23665, + "teacher_loss": 0.17933568358421326 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.8403988480567932, + "learning_rate": 2.6668654228687657e-06, + "loss": 0.3408, + "step": 23666, + "teacher_loss": 0.28531789779663086 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.526668906211853, + "learning_rate": 2.66557280128506e-06, + "loss": 0.1951, + "step": 23667, + "teacher_loss": 0.15823514759540558 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.5754711627960205, + "learning_rate": 2.6642804624927895e-06, + "loss": 0.2397, + "step": 23668, + "teacher_loss": 0.20234721899032593 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.2596701979637146, + "learning_rate": 2.662988406521589e-06, + "loss": 0.1621, + "step": 23669, + "teacher_loss": 0.15130555629730225 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.27532607316970825, + "learning_rate": 2.6616966334010716e-06, + "loss": 0.1559, + "step": 23670, + "teacher_loss": 0.14258795976638794 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.5407602190971375, + "learning_rate": 2.6604051431608584e-06, + "loss": 0.292, + "step": 23671, + "teacher_loss": 0.2643534541130066 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.6901336908340454, + "learning_rate": 2.6591139358305623e-06, + "loss": 0.2529, + "step": 23672, + "teacher_loss": 0.2043488621711731 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.10922780632972717, + "learning_rate": 2.6578230114397845e-06, + "loss": 0.16, + "step": 23673, + "teacher_loss": 0.16559310257434845 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3906305432319641, + "learning_rate": 2.656532370018113e-06, + "loss": 0.1816, + "step": 23674, + "teacher_loss": 0.15834026038646698 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.5325765609741211, + "learning_rate": 2.6552420115951546e-06, + "loss": 0.2156, + "step": 23675, + "teacher_loss": 0.1804354041814804 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.5849790573120117, + "learning_rate": 2.6539519362004856e-06, + "loss": 0.2685, + "step": 23676, + "teacher_loss": 0.2333393096923828 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.7238998413085938, + "learning_rate": 2.6526621438636784e-06, + "loss": 0.3253, + "step": 23677, + "teacher_loss": 0.28098270297050476 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.6053875684738159, + "learning_rate": 2.6513726346143096e-06, + "loss": 0.2313, + "step": 23678, + "teacher_loss": 0.18977703154087067 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.36253345012664795, + "learning_rate": 2.6500834084819476e-06, + "loss": 0.1852, + "step": 23679, + "teacher_loss": 0.16548995673656464 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3586357831954956, + "learning_rate": 2.6487944654961416e-06, + "loss": 0.2256, + "step": 23680, + "teacher_loss": 0.210833340883255 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.2890626788139343, + "learning_rate": 2.647505805686448e-06, + "loss": 0.1437, + "step": 23681, + "teacher_loss": 0.12753267586231232 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 1.1699092388153076, + "learning_rate": 2.6462174290824152e-06, + "loss": 0.3426, + "step": 23682, + "teacher_loss": 0.25064951181411743 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.23057463765144348, + "learning_rate": 2.6449293357135796e-06, + "loss": 0.1693, + "step": 23683, + "teacher_loss": 0.16252447664737701 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.5053479671478271, + "learning_rate": 2.643641525609462e-06, + "loss": 0.2386, + "step": 23684, + "teacher_loss": 0.20893272757530212 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.4053344428539276, + "learning_rate": 2.6423539987996077e-06, + "loss": 0.2278, + "step": 23685, + "teacher_loss": 0.20809870958328247 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.12880100309848785, + "learning_rate": 2.6410667553135244e-06, + "loss": 0.1432, + "step": 23686, + "teacher_loss": 0.14477944374084473 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.4257130026817322, + "learning_rate": 2.63977979518072e-06, + "loss": 0.2553, + "step": 23687, + "teacher_loss": 0.23636209964752197 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.36245274543762207, + "learning_rate": 2.6384931184307133e-06, + "loss": 0.1946, + "step": 23688, + "teacher_loss": 0.1759561002254486 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.2478124499320984, + "learning_rate": 2.6372067250929984e-06, + "loss": 0.1998, + "step": 23689, + "teacher_loss": 0.19450706243515015 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.22109420597553253, + "learning_rate": 2.6359206151970634e-06, + "loss": 0.1813, + "step": 23690, + "teacher_loss": 0.17692360281944275 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.7232908010482788, + "learning_rate": 2.6346347887723997e-06, + "loss": 0.2453, + "step": 23691, + "teacher_loss": 0.19214873015880585 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.5417847633361816, + "learning_rate": 2.6333492458484908e-06, + "loss": 0.1883, + "step": 23692, + "teacher_loss": 0.14897063374519348 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.7968742847442627, + "learning_rate": 2.632063986454803e-06, + "loss": 0.2435, + "step": 23693, + "teacher_loss": 0.1820240020751953 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.6277574896812439, + "learning_rate": 2.6307790106208076e-06, + "loss": 0.2771, + "step": 23694, + "teacher_loss": 0.23808756470680237 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.13025175034999847, + "learning_rate": 2.6294943183759673e-06, + "loss": 0.1703, + "step": 23695, + "teacher_loss": 0.17476221919059753 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.1661679744720459, + "learning_rate": 2.628209909749731e-06, + "loss": 0.1862, + "step": 23696, + "teacher_loss": 0.1883726418018341 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.6914597749710083, + "learning_rate": 2.6269257847715476e-06, + "loss": 0.2839, + "step": 23697, + "teacher_loss": 0.2385876476764679 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3294179439544678, + "learning_rate": 2.6256419434708628e-06, + "loss": 0.1925, + "step": 23698, + "teacher_loss": 0.17728745937347412 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3500949740409851, + "learning_rate": 2.6243583858771093e-06, + "loss": 0.2113, + "step": 23699, + "teacher_loss": 0.19583293795585632 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 1.0988705158233643, + "learning_rate": 2.62307511201971e-06, + "loss": 0.2112, + "step": 23700, + "teacher_loss": 0.11261972784996033 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.523552417755127, + "learning_rate": 2.62179212192809e-06, + "loss": 0.2003, + "step": 23701, + "teacher_loss": 0.16437458992004395 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.6344937682151794, + "learning_rate": 2.620509415631669e-06, + "loss": 0.2464, + "step": 23702, + "teacher_loss": 0.20329588651657104 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.8018810749053955, + "learning_rate": 2.619226993159847e-06, + "loss": 0.2492, + "step": 23703, + "teacher_loss": 0.1877986341714859 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.24137923121452332, + "learning_rate": 2.6179448545420315e-06, + "loss": 0.129, + "step": 23704, + "teacher_loss": 0.1164843812584877 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.22313034534454346, + "learning_rate": 2.6166629998076214e-06, + "loss": 0.2637, + "step": 23705, + "teacher_loss": 0.26822540163993835 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.4842255711555481, + "learning_rate": 2.6153814289859963e-06, + "loss": 0.2132, + "step": 23706, + "teacher_loss": 0.18303081393241882 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.35062384605407715, + "learning_rate": 2.614100142106544e-06, + "loss": 0.2177, + "step": 23707, + "teacher_loss": 0.20291483402252197 + }, + { + "compression_loss": 0.0, + "epoch": 4.28, + "label_loss": 0.3233954906463623, + "learning_rate": 2.612819139198645e-06, + "loss": 0.1846, + "step": 23708, + "teacher_loss": 0.16922539472579956 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.3108372688293457, + "learning_rate": 2.611538420291662e-06, + "loss": 0.1851, + "step": 23709, + "teacher_loss": 0.17108561098575592 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.2696700096130371, + "learning_rate": 2.6102579854149582e-06, + "loss": 0.1846, + "step": 23710, + "teacher_loss": 0.17516444623470306 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.2796294093132019, + "learning_rate": 2.608977834597897e-06, + "loss": 0.1645, + "step": 23711, + "teacher_loss": 0.1516929566860199 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.19252705574035645, + "learning_rate": 2.60769796786982e-06, + "loss": 0.1332, + "step": 23712, + "teacher_loss": 0.1265571266412735 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.262068510055542, + "learning_rate": 2.60641838526008e-06, + "loss": 0.1729, + "step": 23713, + "teacher_loss": 0.16300079226493835 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.435613214969635, + "learning_rate": 2.605139086798002e-06, + "loss": 0.1881, + "step": 23714, + "teacher_loss": 0.16055706143379211 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.2595844268798828, + "learning_rate": 2.603860072512924e-06, + "loss": 0.182, + "step": 23715, + "teacher_loss": 0.17342691123485565 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.3657636046409607, + "learning_rate": 2.602581342434173e-06, + "loss": 0.2395, + "step": 23716, + "teacher_loss": 0.2254914492368698 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5975039005279541, + "learning_rate": 2.601302896591059e-06, + "loss": 0.2866, + "step": 23717, + "teacher_loss": 0.2520139813423157 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.43074923753738403, + "learning_rate": 2.6000247350128996e-06, + "loss": 0.189, + "step": 23718, + "teacher_loss": 0.1621120423078537 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.31292399764060974, + "learning_rate": 2.5987468577289936e-06, + "loss": 0.2344, + "step": 23719, + "teacher_loss": 0.22562751173973083 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.45082762837409973, + "learning_rate": 2.5974692647686405e-06, + "loss": 0.2171, + "step": 23720, + "teacher_loss": 0.19108451902866364 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.6646035313606262, + "learning_rate": 2.596191956161135e-06, + "loss": 0.314, + "step": 23721, + "teacher_loss": 0.2750244736671448 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5501041412353516, + "learning_rate": 2.5949149319357562e-06, + "loss": 0.2738, + "step": 23722, + "teacher_loss": 0.24308286607265472 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.34966331720352173, + "learning_rate": 2.59363819212179e-06, + "loss": 0.1685, + "step": 23723, + "teacher_loss": 0.14837335050106049 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5191367864608765, + "learning_rate": 2.5923617367484998e-06, + "loss": 0.5544, + "step": 23724, + "teacher_loss": 0.5583438873291016 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.34102100133895874, + "learning_rate": 2.5910855658451556e-06, + "loss": 0.1879, + "step": 23725, + "teacher_loss": 0.17089171707630157 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.3432924449443817, + "learning_rate": 2.5898096794410185e-06, + "loss": 0.2075, + "step": 23726, + "teacher_loss": 0.19237539172172546 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.8545480370521545, + "learning_rate": 2.5885340775653345e-06, + "loss": 0.3668, + "step": 23727, + "teacher_loss": 0.31264132261276245 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4476233124732971, + "learning_rate": 2.5872587602473507e-06, + "loss": 0.209, + "step": 23728, + "teacher_loss": 0.18244101107120514 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.377361923456192, + "learning_rate": 2.5859837275163147e-06, + "loss": 0.2144, + "step": 23729, + "teacher_loss": 0.19633352756500244 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4266466498374939, + "learning_rate": 2.584708979401447e-06, + "loss": 0.1645, + "step": 23730, + "teacher_loss": 0.13542108237743378 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.3693513870239258, + "learning_rate": 2.5834345159319807e-06, + "loss": 0.1725, + "step": 23731, + "teacher_loss": 0.15059027075767517 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.519351065158844, + "learning_rate": 2.5821603371371376e-06, + "loss": 0.2482, + "step": 23732, + "teacher_loss": 0.2181241363286972 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.33885496854782104, + "learning_rate": 2.580886443046127e-06, + "loss": 0.1912, + "step": 23733, + "teacher_loss": 0.17476439476013184 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.3547120988368988, + "learning_rate": 2.5796128336881487e-06, + "loss": 0.2199, + "step": 23734, + "teacher_loss": 0.20493239164352417 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.481080025434494, + "learning_rate": 2.5783395090924155e-06, + "loss": 0.1663, + "step": 23735, + "teacher_loss": 0.13136087357997894 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.6012880802154541, + "learning_rate": 2.577066469288118e-06, + "loss": 0.2527, + "step": 23736, + "teacher_loss": 0.21392026543617249 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.588262140750885, + "learning_rate": 2.5757937143044307e-06, + "loss": 0.2237, + "step": 23737, + "teacher_loss": 0.18317997455596924 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.1728990077972412, + "learning_rate": 2.574521244170554e-06, + "loss": 0.174, + "step": 23738, + "teacher_loss": 0.17415770888328552 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5041546821594238, + "learning_rate": 2.5732490589156495e-06, + "loss": 0.1955, + "step": 23739, + "teacher_loss": 0.1611739993095398 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4104025363922119, + "learning_rate": 2.5719771585688835e-06, + "loss": 0.2404, + "step": 23740, + "teacher_loss": 0.22147688269615173 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4538041353225708, + "learning_rate": 2.5707055431594193e-06, + "loss": 0.2093, + "step": 23741, + "teacher_loss": 0.18208365142345428 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4469044804573059, + "learning_rate": 2.569434212716418e-06, + "loss": 0.2544, + "step": 23742, + "teacher_loss": 0.2330143302679062 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.42900365591049194, + "learning_rate": 2.5681631672690165e-06, + "loss": 0.1987, + "step": 23743, + "teacher_loss": 0.1731296330690384 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.45275551080703735, + "learning_rate": 2.5668924068463605e-06, + "loss": 0.2662, + "step": 23744, + "teacher_loss": 0.24547737836837769 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.6328752636909485, + "learning_rate": 2.5656219314775886e-06, + "loss": 0.3254, + "step": 23745, + "teacher_loss": 0.2912237048149109 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.38416627049446106, + "learning_rate": 2.5643517411918273e-06, + "loss": 0.2084, + "step": 23746, + "teacher_loss": 0.1888643503189087 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.31591060757637024, + "learning_rate": 2.5630818360181877e-06, + "loss": 0.1826, + "step": 23747, + "teacher_loss": 0.16783419251441956 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.1476793736219406, + "learning_rate": 2.5618122159858025e-06, + "loss": 0.1522, + "step": 23748, + "teacher_loss": 0.15265515446662903 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.2059365212917328, + "learning_rate": 2.5605428811237696e-06, + "loss": 0.1338, + "step": 23749, + "teacher_loss": 0.1258166879415512 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.7105777263641357, + "learning_rate": 2.5592738314611906e-06, + "loss": 0.3561, + "step": 23750, + "teacher_loss": 0.31666165590286255 + }, + { + "epoch": 4.29, + "eval_exact_match": 80.35950804162725, + "eval_f1": 87.73413237727307, + "step": 23750 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.6139076948165894, + "learning_rate": 2.558005067027164e-06, + "loss": 0.2031, + "step": 23751, + "teacher_loss": 0.15742570161819458 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.6187229156494141, + "learning_rate": 2.5567365878507805e-06, + "loss": 0.2342, + "step": 23752, + "teacher_loss": 0.19143350422382355 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5888549089431763, + "learning_rate": 2.5554683939611172e-06, + "loss": 0.2146, + "step": 23753, + "teacher_loss": 0.17300280928611755 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.6130135655403137, + "learning_rate": 2.5542004853872537e-06, + "loss": 0.2386, + "step": 23754, + "teacher_loss": 0.19705167412757874 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5799407958984375, + "learning_rate": 2.552932862158261e-06, + "loss": 0.2496, + "step": 23755, + "teacher_loss": 0.21286708116531372 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.12938010692596436, + "learning_rate": 2.5516655243031962e-06, + "loss": 0.1551, + "step": 23756, + "teacher_loss": 0.15790340304374695 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5885113477706909, + "learning_rate": 2.5503984718511193e-06, + "loss": 0.2381, + "step": 23757, + "teacher_loss": 0.19914071261882782 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.3127386271953583, + "learning_rate": 2.5491317048310834e-06, + "loss": 0.3631, + "step": 23758, + "teacher_loss": 0.3687482178211212 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4303865134716034, + "learning_rate": 2.5478652232721245e-06, + "loss": 0.2308, + "step": 23759, + "teacher_loss": 0.2086786925792694 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4042850434780121, + "learning_rate": 2.546599027203282e-06, + "loss": 0.1974, + "step": 23760, + "teacher_loss": 0.1743844747543335 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4434024691581726, + "learning_rate": 2.545333116653589e-06, + "loss": 0.1982, + "step": 23761, + "teacher_loss": 0.17091898620128632 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.45673567056655884, + "learning_rate": 2.5440674916520682e-06, + "loss": 0.2736, + "step": 23762, + "teacher_loss": 0.25323793292045593 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.4874110221862793, + "learning_rate": 2.542802152227731e-06, + "loss": 0.1773, + "step": 23763, + "teacher_loss": 0.1428537219762802 + }, + { + "compression_loss": 0.0, + "epoch": 4.29, + "label_loss": 0.5292913913726807, + "learning_rate": 2.5415370984095928e-06, + "loss": 0.2236, + "step": 23764, + "teacher_loss": 0.18958157300949097 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.4316334128379822, + "learning_rate": 2.540272330226658e-06, + "loss": 0.2063, + "step": 23765, + "teacher_loss": 0.18131625652313232 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.24590478837490082, + "learning_rate": 2.5390078477079204e-06, + "loss": 0.1999, + "step": 23766, + "teacher_loss": 0.19474168121814728 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.2540549635887146, + "learning_rate": 2.537743650882372e-06, + "loss": 0.1719, + "step": 23767, + "teacher_loss": 0.1628093123435974 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.20236222445964813, + "learning_rate": 2.536479739779003e-06, + "loss": 0.1402, + "step": 23768, + "teacher_loss": 0.1332767903804779 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.7536660432815552, + "learning_rate": 2.5352161144267798e-06, + "loss": 0.3642, + "step": 23769, + "teacher_loss": 0.32095837593078613 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.20999716222286224, + "learning_rate": 2.5339527748546814e-06, + "loss": 0.1621, + "step": 23770, + "teacher_loss": 0.1567956805229187 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5529207587242126, + "learning_rate": 2.5326897210916746e-06, + "loss": 0.2539, + "step": 23771, + "teacher_loss": 0.22064395248889923 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.21606501936912537, + "learning_rate": 2.5314269531667108e-06, + "loss": 0.145, + "step": 23772, + "teacher_loss": 0.1371353566646576 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.30948394536972046, + "learning_rate": 2.530164471108745e-06, + "loss": 0.2303, + "step": 23773, + "teacher_loss": 0.22154507040977478 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.7455642819404602, + "learning_rate": 2.52890227494672e-06, + "loss": 0.323, + "step": 23774, + "teacher_loss": 0.27603402733802795 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5764243602752686, + "learning_rate": 2.5276403647095752e-06, + "loss": 0.2465, + "step": 23775, + "teacher_loss": 0.20983752608299255 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.2964906692504883, + "learning_rate": 2.526378740426246e-06, + "loss": 0.2291, + "step": 23776, + "teacher_loss": 0.22162553668022156 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.3521164059638977, + "learning_rate": 2.5251174021256514e-06, + "loss": 0.2235, + "step": 23777, + "teacher_loss": 0.20921283960342407 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.6573386192321777, + "learning_rate": 2.5238563498367123e-06, + "loss": 0.3724, + "step": 23778, + "teacher_loss": 0.3407594561576843 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.260146826505661, + "learning_rate": 2.5225955835883465e-06, + "loss": 0.1641, + "step": 23779, + "teacher_loss": 0.15339714288711548 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.21703004837036133, + "learning_rate": 2.52133510340945e-06, + "loss": 0.1598, + "step": 23780, + "teacher_loss": 0.1534246951341629 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.4301258325576782, + "learning_rate": 2.5200749093289306e-06, + "loss": 0.2057, + "step": 23781, + "teacher_loss": 0.18078739941120148 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.19837181270122528, + "learning_rate": 2.5188150013756727e-06, + "loss": 0.1856, + "step": 23782, + "teacher_loss": 0.18417882919311523 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.2406182587146759, + "learning_rate": 2.5175553795785657e-06, + "loss": 0.1599, + "step": 23783, + "teacher_loss": 0.15089094638824463 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.7943019270896912, + "learning_rate": 2.516296043966494e-06, + "loss": 0.2647, + "step": 23784, + "teacher_loss": 0.20584538578987122 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.2987392544746399, + "learning_rate": 2.5150369945683233e-06, + "loss": 0.1882, + "step": 23785, + "teacher_loss": 0.17588341236114502 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.3883625864982605, + "learning_rate": 2.5137782314129255e-06, + "loss": 0.2015, + "step": 23786, + "teacher_loss": 0.18076877295970917 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.45821377635002136, + "learning_rate": 2.5125197545291524e-06, + "loss": 0.1958, + "step": 23787, + "teacher_loss": 0.16664782166481018 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5024231672286987, + "learning_rate": 2.5112615639458626e-06, + "loss": 0.2038, + "step": 23788, + "teacher_loss": 0.17067420482635498 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.673396110534668, + "learning_rate": 2.5100036596919064e-06, + "loss": 0.2418, + "step": 23789, + "teacher_loss": 0.1938217133283615 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.2702580690383911, + "learning_rate": 2.5087460417961155e-06, + "loss": 0.2336, + "step": 23790, + "teacher_loss": 0.22954952716827393 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5539103746414185, + "learning_rate": 2.5074887102873255e-06, + "loss": 0.191, + "step": 23791, + "teacher_loss": 0.1506558507680893 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.8834432363510132, + "learning_rate": 2.50623166519437e-06, + "loss": 0.2313, + "step": 23792, + "teacher_loss": 0.15885570645332336 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.25771063566207886, + "learning_rate": 2.504974906546061e-06, + "loss": 0.206, + "step": 23793, + "teacher_loss": 0.20021876692771912 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5256583094596863, + "learning_rate": 2.5037184343712147e-06, + "loss": 0.2653, + "step": 23794, + "teacher_loss": 0.23638774454593658 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.4426930546760559, + "learning_rate": 2.5024622486986425e-06, + "loss": 0.1829, + "step": 23795, + "teacher_loss": 0.1540175974369049 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.41718316078186035, + "learning_rate": 2.501206349557141e-06, + "loss": 0.2059, + "step": 23796, + "teacher_loss": 0.18241557478904724 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5743406414985657, + "learning_rate": 2.499950736975498e-06, + "loss": 0.2322, + "step": 23797, + "teacher_loss": 0.1941579282283783 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.4761444926261902, + "learning_rate": 2.498695410982515e-06, + "loss": 0.2226, + "step": 23798, + "teacher_loss": 0.19440913200378418 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.18100130558013916, + "learning_rate": 2.4974403716069648e-06, + "loss": 0.1724, + "step": 23799, + "teacher_loss": 0.17143908143043518 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.15965360403060913, + "learning_rate": 2.4961856188776185e-06, + "loss": 0.1512, + "step": 23800, + "teacher_loss": 0.15023568272590637 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.1399000734090805, + "learning_rate": 2.494931152823247e-06, + "loss": 0.1403, + "step": 23801, + "teacher_loss": 0.14036494493484497 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.17357763648033142, + "learning_rate": 2.4936769734726166e-06, + "loss": 0.1539, + "step": 23802, + "teacher_loss": 0.15173056721687317 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.1769634187221527, + "learning_rate": 2.492423080854474e-06, + "loss": 0.1605, + "step": 23803, + "teacher_loss": 0.1586201786994934 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.43957751989364624, + "learning_rate": 2.4911694749975693e-06, + "loss": 0.2639, + "step": 23804, + "teacher_loss": 0.24432754516601562 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.6758774518966675, + "learning_rate": 2.48991615593065e-06, + "loss": 0.2416, + "step": 23805, + "teacher_loss": 0.1933761090040207 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.24798092246055603, + "learning_rate": 2.4886631236824457e-06, + "loss": 0.1635, + "step": 23806, + "teacher_loss": 0.15416103601455688 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.8544960021972656, + "learning_rate": 2.4874103782816777e-06, + "loss": 0.3367, + "step": 23807, + "teacher_loss": 0.2792031764984131 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.4296036958694458, + "learning_rate": 2.4861579197570805e-06, + "loss": 0.263, + "step": 23808, + "teacher_loss": 0.24443665146827698 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5193527936935425, + "learning_rate": 2.4849057481373665e-06, + "loss": 0.2197, + "step": 23809, + "teacher_loss": 0.18639755249023438 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5704477429389954, + "learning_rate": 2.4836538634512327e-06, + "loss": 0.2567, + "step": 23810, + "teacher_loss": 0.22183957695960999 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.6073932647705078, + "learning_rate": 2.4824022657273982e-06, + "loss": 0.223, + "step": 23811, + "teacher_loss": 0.18032227456569672 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5477582216262817, + "learning_rate": 2.4811509549945495e-06, + "loss": 0.3636, + "step": 23812, + "teacher_loss": 0.3431586027145386 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.5420389175415039, + "learning_rate": 2.479899931281372e-06, + "loss": 0.2945, + "step": 23813, + "teacher_loss": 0.2669578790664673 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.6106827855110168, + "learning_rate": 2.4786491946165513e-06, + "loss": 0.2727, + "step": 23814, + "teacher_loss": 0.23517176508903503 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.6508402824401855, + "learning_rate": 2.4773987450287676e-06, + "loss": 0.3378, + "step": 23815, + "teacher_loss": 0.3030053377151489 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.26316964626312256, + "learning_rate": 2.4761485825466833e-06, + "loss": 0.2458, + "step": 23816, + "teacher_loss": 0.24381719529628754 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.20813339948654175, + "learning_rate": 2.4748987071989637e-06, + "loss": 0.1682, + "step": 23817, + "teacher_loss": 0.16370925307273865 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.747562050819397, + "learning_rate": 2.4736491190142673e-06, + "loss": 0.4468, + "step": 23818, + "teacher_loss": 0.4133702516555786 + }, + { + "compression_loss": 0.0, + "epoch": 4.3, + "label_loss": 0.2551780939102173, + "learning_rate": 2.4723998180212366e-06, + "loss": 0.1741, + "step": 23819, + "teacher_loss": 0.16505509614944458 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.2800525426864624, + "learning_rate": 2.47115080424852e-06, + "loss": 0.1634, + "step": 23820, + "teacher_loss": 0.15049193799495697 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4618319272994995, + "learning_rate": 2.4699020777247545e-06, + "loss": 0.3396, + "step": 23821, + "teacher_loss": 0.32604295015335083 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.46803539991378784, + "learning_rate": 2.468653638478567e-06, + "loss": 0.2079, + "step": 23822, + "teacher_loss": 0.17895999550819397 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.568474531173706, + "learning_rate": 2.4674054865385775e-06, + "loss": 0.2291, + "step": 23823, + "teacher_loss": 0.19134452939033508 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.10933741182088852, + "learning_rate": 2.4661576219334054e-06, + "loss": 0.1574, + "step": 23824, + "teacher_loss": 0.16276784241199493 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.1125771701335907, + "learning_rate": 2.4649100446916646e-06, + "loss": 0.1662, + "step": 23825, + "teacher_loss": 0.17211046814918518 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.5954536199569702, + "learning_rate": 2.4636627548419486e-06, + "loss": 0.2485, + "step": 23826, + "teacher_loss": 0.2099829912185669 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.42176496982574463, + "learning_rate": 2.462415752412862e-06, + "loss": 0.2178, + "step": 23827, + "teacher_loss": 0.19509148597717285 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.21014374494552612, + "learning_rate": 2.4611690374329965e-06, + "loss": 0.2016, + "step": 23828, + "teacher_loss": 0.20066046714782715 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.34823089838027954, + "learning_rate": 2.4599226099309274e-06, + "loss": 0.1566, + "step": 23829, + "teacher_loss": 0.13529442250728607 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.5129961371421814, + "learning_rate": 2.4586764699352353e-06, + "loss": 0.2196, + "step": 23830, + "teacher_loss": 0.18697024881839752 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.9168434739112854, + "learning_rate": 2.4574306174744943e-06, + "loss": 0.3457, + "step": 23831, + "teacher_loss": 0.2822802662849426 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.3887714743614197, + "learning_rate": 2.4561850525772613e-06, + "loss": 0.2082, + "step": 23832, + "teacher_loss": 0.1880871057510376 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.2686925530433655, + "learning_rate": 2.4549397752720966e-06, + "loss": 0.2274, + "step": 23833, + "teacher_loss": 0.22280171513557434 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.10843300074338913, + "learning_rate": 2.4536947855875558e-06, + "loss": 0.1213, + "step": 23834, + "teacher_loss": 0.12276796251535416 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4365948438644409, + "learning_rate": 2.452450083552173e-06, + "loss": 0.2118, + "step": 23835, + "teacher_loss": 0.18684130907058716 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.6035574674606323, + "learning_rate": 2.4512056691944932e-06, + "loss": 0.2798, + "step": 23836, + "teacher_loss": 0.2438754439353943 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.19577571749687195, + "learning_rate": 2.449961542543041e-06, + "loss": 0.206, + "step": 23837, + "teacher_loss": 0.20716840028762817 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4716640114784241, + "learning_rate": 2.4487177036263448e-06, + "loss": 0.1613, + "step": 23838, + "teacher_loss": 0.12685352563858032 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.3927239775657654, + "learning_rate": 2.447474152472923e-06, + "loss": 0.1667, + "step": 23839, + "teacher_loss": 0.14162956178188324 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.6211049556732178, + "learning_rate": 2.446230889111282e-06, + "loss": 0.1933, + "step": 23840, + "teacher_loss": 0.14578421413898468 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.2738092243671417, + "learning_rate": 2.4449879135699286e-06, + "loss": 0.1901, + "step": 23841, + "teacher_loss": 0.18084776401519775 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.49023595452308655, + "learning_rate": 2.4437452258773635e-06, + "loss": 0.2198, + "step": 23842, + "teacher_loss": 0.18974299728870392 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.45602354407310486, + "learning_rate": 2.442502826062072e-06, + "loss": 0.1889, + "step": 23843, + "teacher_loss": 0.15919554233551025 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.8991382718086243, + "learning_rate": 2.441260714152543e-06, + "loss": 0.2489, + "step": 23844, + "teacher_loss": 0.17661425471305847 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.36079782247543335, + "learning_rate": 2.44001889017725e-06, + "loss": 0.2013, + "step": 23845, + "teacher_loss": 0.18355578184127808 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.47617724537849426, + "learning_rate": 2.4387773541646713e-06, + "loss": 0.2382, + "step": 23846, + "teacher_loss": 0.21178226172924042 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.47051021456718445, + "learning_rate": 2.437536106143265e-06, + "loss": 0.2486, + "step": 23847, + "teacher_loss": 0.22398701310157776 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4085085690021515, + "learning_rate": 2.43629514614149e-06, + "loss": 0.213, + "step": 23848, + "teacher_loss": 0.19133064150810242 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.3996298313140869, + "learning_rate": 2.435054474187802e-06, + "loss": 0.174, + "step": 23849, + "teacher_loss": 0.1488867998123169 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.13941237330436707, + "learning_rate": 2.433814090310642e-06, + "loss": 0.1385, + "step": 23850, + "teacher_loss": 0.13840574026107788 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4587949514389038, + "learning_rate": 2.4325739945384484e-06, + "loss": 0.1645, + "step": 23851, + "teacher_loss": 0.13180112838745117 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.2695939838886261, + "learning_rate": 2.431334186899657e-06, + "loss": 0.1906, + "step": 23852, + "teacher_loss": 0.18180054426193237 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.5554072856903076, + "learning_rate": 2.4300946674226864e-06, + "loss": 0.3261, + "step": 23853, + "teacher_loss": 0.30063754320144653 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4812760353088379, + "learning_rate": 2.4288554361359592e-06, + "loss": 0.2257, + "step": 23854, + "teacher_loss": 0.19732257723808289 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.3424815535545349, + "learning_rate": 2.4276164930678895e-06, + "loss": 0.1917, + "step": 23855, + "teacher_loss": 0.174946129322052 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.38485413789749146, + "learning_rate": 2.426377838246881e-06, + "loss": 0.2172, + "step": 23856, + "teacher_loss": 0.19852669537067413 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.2820977568626404, + "learning_rate": 2.4251394717013225e-06, + "loss": 0.1675, + "step": 23857, + "teacher_loss": 0.15473651885986328 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.33201009035110474, + "learning_rate": 2.4239013934596217e-06, + "loss": 0.1816, + "step": 23858, + "teacher_loss": 0.16483500599861145 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4268755614757538, + "learning_rate": 2.422663603550157e-06, + "loss": 0.179, + "step": 23859, + "teacher_loss": 0.15141290426254272 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.45509421825408936, + "learning_rate": 2.4214261020013013e-06, + "loss": 0.2218, + "step": 23860, + "teacher_loss": 0.1958651840686798 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.760026216506958, + "learning_rate": 2.420188888841438e-06, + "loss": 0.3222, + "step": 23861, + "teacher_loss": 0.2735786437988281 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.24350839853286743, + "learning_rate": 2.4189519640989282e-06, + "loss": 0.1629, + "step": 23862, + "teacher_loss": 0.15390709042549133 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.6660348176956177, + "learning_rate": 2.4177153278021258e-06, + "loss": 0.2341, + "step": 23863, + "teacher_loss": 0.1861265003681183 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.23576444387435913, + "learning_rate": 2.416478979979388e-06, + "loss": 0.1866, + "step": 23864, + "teacher_loss": 0.18108394742012024 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.41180574893951416, + "learning_rate": 2.415242920659062e-06, + "loss": 0.1702, + "step": 23865, + "teacher_loss": 0.14333011209964752 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.44124647974967957, + "learning_rate": 2.414007149869482e-06, + "loss": 0.2053, + "step": 23866, + "teacher_loss": 0.17912161350250244 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.469724178314209, + "learning_rate": 2.4127716676389837e-06, + "loss": 0.4436, + "step": 23867, + "teacher_loss": 0.44067060947418213 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.8521972894668579, + "learning_rate": 2.411536473995896e-06, + "loss": 0.254, + "step": 23868, + "teacher_loss": 0.18750128149986267 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.4770563840866089, + "learning_rate": 2.410301568968535e-06, + "loss": 0.2269, + "step": 23869, + "teacher_loss": 0.1990727186203003 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.2516653537750244, + "learning_rate": 2.4090669525852057e-06, + "loss": 0.1767, + "step": 23870, + "teacher_loss": 0.1684255599975586 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.33466997742652893, + "learning_rate": 2.407832624874227e-06, + "loss": 0.1801, + "step": 23871, + "teacher_loss": 0.16287216544151306 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.48133599758148193, + "learning_rate": 2.4065985858638955e-06, + "loss": 0.2529, + "step": 23872, + "teacher_loss": 0.22756756842136383 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.3731071352958679, + "learning_rate": 2.4053648355824964e-06, + "loss": 0.216, + "step": 23873, + "teacher_loss": 0.1985270380973816 + }, + { + "compression_loss": 0.0, + "epoch": 4.31, + "label_loss": 0.31552666425704956, + "learning_rate": 2.404131374058322e-06, + "loss": 0.1858, + "step": 23874, + "teacher_loss": 0.17139732837677002 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.9457638263702393, + "learning_rate": 2.4028982013196533e-06, + "loss": 0.2622, + "step": 23875, + "teacher_loss": 0.18627455830574036 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.4866615831851959, + "learning_rate": 2.401665317394759e-06, + "loss": 0.2059, + "step": 23876, + "teacher_loss": 0.17465436458587646 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.3103378415107727, + "learning_rate": 2.400432722311905e-06, + "loss": 0.1828, + "step": 23877, + "teacher_loss": 0.16863971948623657 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.37522751092910767, + "learning_rate": 2.3992004160993587e-06, + "loss": 0.1762, + "step": 23878, + "teacher_loss": 0.15412789583206177 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.2887871265411377, + "learning_rate": 2.3979683987853623e-06, + "loss": 0.1717, + "step": 23879, + "teacher_loss": 0.15871457755565643 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.7413889169692993, + "learning_rate": 2.3967366703981687e-06, + "loss": 0.2301, + "step": 23880, + "teacher_loss": 0.17330613732337952 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.24267703294754028, + "learning_rate": 2.3955052309660215e-06, + "loss": 0.1614, + "step": 23881, + "teacher_loss": 0.15238264203071594 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.42451900243759155, + "learning_rate": 2.394274080517143e-06, + "loss": 0.2171, + "step": 23882, + "teacher_loss": 0.19400498270988464 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.33517399430274963, + "learning_rate": 2.393043219079768e-06, + "loss": 0.2242, + "step": 23883, + "teacher_loss": 0.2118556797504425 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.4546704888343811, + "learning_rate": 2.3918126466821165e-06, + "loss": 0.2494, + "step": 23884, + "teacher_loss": 0.22662681341171265 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5795440673828125, + "learning_rate": 2.3905823633523997e-06, + "loss": 0.2605, + "step": 23885, + "teacher_loss": 0.2250637710094452 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.6300973296165466, + "learning_rate": 2.3893523691188216e-06, + "loss": 0.3525, + "step": 23886, + "teacher_loss": 0.32168614864349365 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.19654031097888947, + "learning_rate": 2.3881226640095847e-06, + "loss": 0.1843, + "step": 23887, + "teacher_loss": 0.18289095163345337 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.37306350469589233, + "learning_rate": 2.3868932480528867e-06, + "loss": 0.181, + "step": 23888, + "teacher_loss": 0.159670889377594 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.9259461164474487, + "learning_rate": 2.3856641212769057e-06, + "loss": 0.2978, + "step": 23889, + "teacher_loss": 0.2279871553182602 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.16115117073059082, + "learning_rate": 2.3844352837098286e-06, + "loss": 0.1227, + "step": 23890, + "teacher_loss": 0.11840471625328064 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.29567861557006836, + "learning_rate": 2.3832067353798294e-06, + "loss": 0.21, + "step": 23891, + "teacher_loss": 0.2004910707473755 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.1721012145280838, + "learning_rate": 2.381978476315068e-06, + "loss": 0.142, + "step": 23892, + "teacher_loss": 0.1386529803276062 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.6027420163154602, + "learning_rate": 2.3807505065437113e-06, + "loss": 0.2101, + "step": 23893, + "teacher_loss": 0.1664503663778305 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.27796274423599243, + "learning_rate": 2.3795228260939146e-06, + "loss": 0.1618, + "step": 23894, + "teacher_loss": 0.14889875054359436 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.18522487580776215, + "learning_rate": 2.378295434993817e-06, + "loss": 0.2036, + "step": 23895, + "teacher_loss": 0.20562222599983215 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.43860286474227905, + "learning_rate": 2.377068333271566e-06, + "loss": 0.2588, + "step": 23896, + "teacher_loss": 0.23887313902378082 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.3722781240940094, + "learning_rate": 2.3758415209552893e-06, + "loss": 0.1865, + "step": 23897, + "teacher_loss": 0.16590192914009094 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.35110148787498474, + "learning_rate": 2.374614998073119e-06, + "loss": 0.1886, + "step": 23898, + "teacher_loss": 0.17056719958782196 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.6511486172676086, + "learning_rate": 2.3733887646531767e-06, + "loss": 0.2218, + "step": 23899, + "teacher_loss": 0.17406833171844482 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.31316378712654114, + "learning_rate": 2.372162820723569e-06, + "loss": 0.2009, + "step": 23900, + "teacher_loss": 0.18847988545894623 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.3417724072933197, + "learning_rate": 2.3709371663124073e-06, + "loss": 0.1842, + "step": 23901, + "teacher_loss": 0.16670268774032593 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.47425639629364014, + "learning_rate": 2.3697118014477974e-06, + "loss": 0.1739, + "step": 23902, + "teacher_loss": 0.14055943489074707 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5473437309265137, + "learning_rate": 2.3684867261578236e-06, + "loss": 0.2306, + "step": 23903, + "teacher_loss": 0.1954355239868164 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.3548859655857086, + "learning_rate": 2.36726194047058e-06, + "loss": 0.179, + "step": 23904, + "teacher_loss": 0.1594819873571396 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5557812452316284, + "learning_rate": 2.3660374444141468e-06, + "loss": 0.246, + "step": 23905, + "teacher_loss": 0.2116013467311859 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.3208061456680298, + "learning_rate": 2.3648132380165927e-06, + "loss": 0.217, + "step": 23906, + "teacher_loss": 0.2054111659526825 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.10957368463277817, + "learning_rate": 2.363589321305993e-06, + "loss": 0.2082, + "step": 23907, + "teacher_loss": 0.21916519105434418 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.4408269226551056, + "learning_rate": 2.3623656943104015e-06, + "loss": 0.2311, + "step": 23908, + "teacher_loss": 0.20784947276115417 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.38090503215789795, + "learning_rate": 2.361142357057878e-06, + "loss": 0.2389, + "step": 23909, + "teacher_loss": 0.22308112680912018 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.26425451040267944, + "learning_rate": 2.359919309576462e-06, + "loss": 0.1898, + "step": 23910, + "teacher_loss": 0.18153586983680725 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.41465017199516296, + "learning_rate": 2.3586965518942024e-06, + "loss": 0.2, + "step": 23911, + "teacher_loss": 0.1762000024318695 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.42134973406791687, + "learning_rate": 2.357474084039131e-06, + "loss": 0.2102, + "step": 23912, + "teacher_loss": 0.1867617517709732 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.637391209602356, + "learning_rate": 2.356251906039272e-06, + "loss": 0.2311, + "step": 23913, + "teacher_loss": 0.18595662713050842 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.2097444087266922, + "learning_rate": 2.3550300179226495e-06, + "loss": 0.2016, + "step": 23914, + "teacher_loss": 0.20068207383155823 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.2666570544242859, + "learning_rate": 2.3538084197172793e-06, + "loss": 0.1414, + "step": 23915, + "teacher_loss": 0.12745575606822968 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5167502760887146, + "learning_rate": 2.352587111451165e-06, + "loss": 0.2202, + "step": 23916, + "teacher_loss": 0.18729762732982635 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.2968178987503052, + "learning_rate": 2.3513660931523073e-06, + "loss": 0.1681, + "step": 23917, + "teacher_loss": 0.15382464230060577 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5659321546554565, + "learning_rate": 2.3501453648487075e-06, + "loss": 0.2714, + "step": 23918, + "teacher_loss": 0.23864081501960754 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.32961222529411316, + "learning_rate": 2.348924926568348e-06, + "loss": 0.18, + "step": 23919, + "teacher_loss": 0.16337840259075165 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5843708515167236, + "learning_rate": 2.347704778339202e-06, + "loss": 0.2299, + "step": 23920, + "teacher_loss": 0.19050216674804688 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.190871000289917, + "learning_rate": 2.3464849201892596e-06, + "loss": 0.2396, + "step": 23921, + "teacher_loss": 0.24503040313720703 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.5453879237174988, + "learning_rate": 2.3452653521464824e-06, + "loss": 0.2554, + "step": 23922, + "teacher_loss": 0.22319284081459045 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.633758008480072, + "learning_rate": 2.3440460742388243e-06, + "loss": 0.2428, + "step": 23923, + "teacher_loss": 0.19933396577835083 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.9109911918640137, + "learning_rate": 2.3428270864942446e-06, + "loss": 0.296, + "step": 23924, + "teacher_loss": 0.22764885425567627 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.2388249635696411, + "learning_rate": 2.3416083889406963e-06, + "loss": 0.1824, + "step": 23925, + "teacher_loss": 0.176140695810318 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.333368182182312, + "learning_rate": 2.3403899816061135e-06, + "loss": 0.2113, + "step": 23926, + "teacher_loss": 0.19775158166885376 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.3298936188220978, + "learning_rate": 2.3391718645184307e-06, + "loss": 0.2278, + "step": 23927, + "teacher_loss": 0.21647566556930542 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.6233240365982056, + "learning_rate": 2.3379540377055834e-06, + "loss": 0.1815, + "step": 23928, + "teacher_loss": 0.13236692547798157 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.9931387901306152, + "learning_rate": 2.3367365011954865e-06, + "loss": 0.2743, + "step": 23929, + "teacher_loss": 0.1943785548210144 + }, + { + "compression_loss": 0.0, + "epoch": 4.32, + "label_loss": 0.30954569578170776, + "learning_rate": 2.3355192550160475e-06, + "loss": 0.1665, + "step": 23930, + "teacher_loss": 0.15056119859218597 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.20499663054943085, + "learning_rate": 2.334302299195189e-06, + "loss": 0.1465, + "step": 23931, + "teacher_loss": 0.13994693756103516 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.34827184677124023, + "learning_rate": 2.3330856337608054e-06, + "loss": 0.3122, + "step": 23932, + "teacher_loss": 0.30821114778518677 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5599461793899536, + "learning_rate": 2.3318692587407842e-06, + "loss": 0.1621, + "step": 23933, + "teacher_loss": 0.11793524026870728 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.48645541071891785, + "learning_rate": 2.3306531741630265e-06, + "loss": 0.2693, + "step": 23934, + "teacher_loss": 0.2451891303062439 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.612858235836029, + "learning_rate": 2.329437380055407e-06, + "loss": 0.2005, + "step": 23935, + "teacher_loss": 0.15466400980949402 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.7014422416687012, + "learning_rate": 2.328221876445796e-06, + "loss": 0.2589, + "step": 23936, + "teacher_loss": 0.2097308486700058 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.8429751396179199, + "learning_rate": 2.3270066633620656e-06, + "loss": 0.2519, + "step": 23937, + "teacher_loss": 0.1862192451953888 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.514324963092804, + "learning_rate": 2.325791740832081e-06, + "loss": 0.2607, + "step": 23938, + "teacher_loss": 0.23248812556266785 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5227470993995667, + "learning_rate": 2.324577108883689e-06, + "loss": 0.234, + "step": 23939, + "teacher_loss": 0.20190829038619995 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.6845039129257202, + "learning_rate": 2.3233627675447394e-06, + "loss": 0.2624, + "step": 23940, + "teacher_loss": 0.2155291736125946 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.21631315350532532, + "learning_rate": 2.322148716843081e-06, + "loss": 0.1552, + "step": 23941, + "teacher_loss": 0.14838644862174988 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.31922048330307007, + "learning_rate": 2.320934956806536e-06, + "loss": 0.1448, + "step": 23942, + "teacher_loss": 0.1253708451986313 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.6853855848312378, + "learning_rate": 2.3197214874629413e-06, + "loss": 0.2634, + "step": 23943, + "teacher_loss": 0.21648722887039185 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.18802130222320557, + "learning_rate": 2.318508308840117e-06, + "loss": 0.1938, + "step": 23944, + "teacher_loss": 0.19443315267562866 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.3451688289642334, + "learning_rate": 2.3172954209658748e-06, + "loss": 0.2163, + "step": 23945, + "teacher_loss": 0.20197612047195435 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.6660696268081665, + "learning_rate": 2.3160828238680256e-06, + "loss": 0.2442, + "step": 23946, + "teacher_loss": 0.1973493993282318 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5614475011825562, + "learning_rate": 2.314870517574365e-06, + "loss": 0.216, + "step": 23947, + "teacher_loss": 0.1776343584060669 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.3613615036010742, + "learning_rate": 2.3136585021126965e-06, + "loss": 0.2308, + "step": 23948, + "teacher_loss": 0.21627968549728394 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5383799076080322, + "learning_rate": 2.3124467775107975e-06, + "loss": 0.2569, + "step": 23949, + "teacher_loss": 0.22563251852989197 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.9112308025360107, + "learning_rate": 2.311235343796456e-06, + "loss": 0.3528, + "step": 23950, + "teacher_loss": 0.29072660207748413 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.47813916206359863, + "learning_rate": 2.31002420099745e-06, + "loss": 0.1961, + "step": 23951, + "teacher_loss": 0.16476452350616455 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5244801640510559, + "learning_rate": 2.3088133491415365e-06, + "loss": 0.2348, + "step": 23952, + "teacher_loss": 0.20258218050003052 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.2677934169769287, + "learning_rate": 2.307602788256484e-06, + "loss": 0.2032, + "step": 23953, + "teacher_loss": 0.19604431092739105 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.2846108675003052, + "learning_rate": 2.3063925183700494e-06, + "loss": 0.2086, + "step": 23954, + "teacher_loss": 0.20016473531723022 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.46850287914276123, + "learning_rate": 2.3051825395099734e-06, + "loss": 0.2324, + "step": 23955, + "teacher_loss": 0.20621876418590546 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.3939194679260254, + "learning_rate": 2.3039728517039998e-06, + "loss": 0.2034, + "step": 23956, + "teacher_loss": 0.1822763979434967 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.40213149785995483, + "learning_rate": 2.302763454979868e-06, + "loss": 0.2674, + "step": 23957, + "teacher_loss": 0.2524672746658325 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.3609432578086853, + "learning_rate": 2.301554349365298e-06, + "loss": 0.2937, + "step": 23958, + "teacher_loss": 0.2861797511577606 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5048679113388062, + "learning_rate": 2.3003455348880205e-06, + "loss": 0.2381, + "step": 23959, + "teacher_loss": 0.2084188163280487 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.28921329975128174, + "learning_rate": 2.2991370115757383e-06, + "loss": 0.1617, + "step": 23960, + "teacher_loss": 0.14748495817184448 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.6287018060684204, + "learning_rate": 2.297928779456166e-06, + "loss": 0.2208, + "step": 23961, + "teacher_loss": 0.17549774050712585 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5425493717193604, + "learning_rate": 2.2967208385570084e-06, + "loss": 0.2184, + "step": 23962, + "teacher_loss": 0.18235903978347778 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.11756950616836548, + "learning_rate": 2.295513188905951e-06, + "loss": 0.1751, + "step": 23963, + "teacher_loss": 0.18150639533996582 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.6002629399299622, + "learning_rate": 2.2943058305306856e-06, + "loss": 0.2558, + "step": 23964, + "teacher_loss": 0.2175242304801941 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.24027487635612488, + "learning_rate": 2.2930987634588983e-06, + "loss": 0.1978, + "step": 23965, + "teacher_loss": 0.1931256204843521 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.3135644793510437, + "learning_rate": 2.291891987718257e-06, + "loss": 0.2693, + "step": 23966, + "teacher_loss": 0.2643556594848633 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.19681869447231293, + "learning_rate": 2.290685503336429e-06, + "loss": 0.1835, + "step": 23967, + "teacher_loss": 0.18202456831932068 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.38821619749069214, + "learning_rate": 2.289479310341083e-06, + "loss": 0.2654, + "step": 23968, + "teacher_loss": 0.2517406940460205 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.2564573585987091, + "learning_rate": 2.2882734087598685e-06, + "loss": 0.2019, + "step": 23969, + "teacher_loss": 0.19582590460777283 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.14615774154663086, + "learning_rate": 2.287067798620429e-06, + "loss": 0.1553, + "step": 23970, + "teacher_loss": 0.15634512901306152 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.32785242795944214, + "learning_rate": 2.2858624799504086e-06, + "loss": 0.1947, + "step": 23971, + "teacher_loss": 0.17985573410987854 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.20455320179462433, + "learning_rate": 2.2846574527774476e-06, + "loss": 0.1582, + "step": 23972, + "teacher_loss": 0.1530318558216095 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5766096115112305, + "learning_rate": 2.283452717129163e-06, + "loss": 0.2153, + "step": 23973, + "teacher_loss": 0.17520524561405182 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.7136630415916443, + "learning_rate": 2.282248273033184e-06, + "loss": 0.2746, + "step": 23974, + "teacher_loss": 0.22585183382034302 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 1.1593677997589111, + "learning_rate": 2.2810441205171236e-06, + "loss": 0.3139, + "step": 23975, + "teacher_loss": 0.21996553242206573 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5279066562652588, + "learning_rate": 2.279840259608586e-06, + "loss": 0.1853, + "step": 23976, + "teacher_loss": 0.14726190268993378 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.5876827239990234, + "learning_rate": 2.2786366903351747e-06, + "loss": 0.1734, + "step": 23977, + "teacher_loss": 0.1273704171180725 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.383453369140625, + "learning_rate": 2.2774334127244856e-06, + "loss": 0.2264, + "step": 23978, + "teacher_loss": 0.20899195969104767 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.26491016149520874, + "learning_rate": 2.2762304268041053e-06, + "loss": 0.2031, + "step": 23979, + "teacher_loss": 0.19623857736587524 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.20869015157222748, + "learning_rate": 2.2750277326016043e-06, + "loss": 0.1346, + "step": 23980, + "teacher_loss": 0.12638501822948456 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.3733154833316803, + "learning_rate": 2.273825330144575e-06, + "loss": 0.2116, + "step": 23981, + "teacher_loss": 0.1936642825603485 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.13564841449260712, + "learning_rate": 2.2726232194605747e-06, + "loss": 0.1347, + "step": 23982, + "teacher_loss": 0.13454213738441467 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.6553156971931458, + "learning_rate": 2.2714214005771595e-06, + "loss": 0.185, + "step": 23983, + "teacher_loss": 0.1326877623796463 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.41339778900146484, + "learning_rate": 2.270219873521896e-06, + "loss": 0.2617, + "step": 23984, + "teacher_loss": 0.24485361576080322 + }, + { + "compression_loss": 0.0, + "epoch": 4.33, + "label_loss": 0.28764328360557556, + "learning_rate": 2.269018638322324e-06, + "loss": 0.1743, + "step": 23985, + "teacher_loss": 0.16166174411773682 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.33214449882507324, + "learning_rate": 2.2678176950059826e-06, + "loss": 0.2087, + "step": 23986, + "teacher_loss": 0.19494515657424927 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2930101156234741, + "learning_rate": 2.2666170436004084e-06, + "loss": 0.1355, + "step": 23987, + "teacher_loss": 0.11795730888843536 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4901461601257324, + "learning_rate": 2.2654166841331315e-06, + "loss": 0.197, + "step": 23988, + "teacher_loss": 0.1644403040409088 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.41059041023254395, + "learning_rate": 2.2642166166316676e-06, + "loss": 0.1769, + "step": 23989, + "teacher_loss": 0.15092208981513977 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2745780348777771, + "learning_rate": 2.2630168411235315e-06, + "loss": 0.2375, + "step": 23990, + "teacher_loss": 0.23334476351737976 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 1.0527151823043823, + "learning_rate": 2.261817357636236e-06, + "loss": 0.3216, + "step": 23991, + "teacher_loss": 0.240375816822052 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4498971700668335, + "learning_rate": 2.260618166197276e-06, + "loss": 0.2159, + "step": 23992, + "teacher_loss": 0.1898496150970459 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.36716943979263306, + "learning_rate": 2.2594192668341417e-06, + "loss": 0.1754, + "step": 23993, + "teacher_loss": 0.1540842056274414 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.34837278723716736, + "learning_rate": 2.2582206595743323e-06, + "loss": 0.3063, + "step": 23994, + "teacher_loss": 0.30162712931632996 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.7202326059341431, + "learning_rate": 2.2570223444453203e-06, + "loss": 0.4423, + "step": 23995, + "teacher_loss": 0.41144686937332153 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.31576868891716003, + "learning_rate": 2.255824321474576e-06, + "loss": 0.2031, + "step": 23996, + "teacher_loss": 0.19055622816085815 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.8264892101287842, + "learning_rate": 2.2546265906895726e-06, + "loss": 0.221, + "step": 23997, + "teacher_loss": 0.15368813276290894 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.18294093012809753, + "learning_rate": 2.2534291521177725e-06, + "loss": 0.2155, + "step": 23998, + "teacher_loss": 0.21916446089744568 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4606604278087616, + "learning_rate": 2.2522320057866223e-06, + "loss": 0.1811, + "step": 23999, + "teacher_loss": 0.15003514289855957 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2972765564918518, + "learning_rate": 2.2510351517235722e-06, + "loss": 0.1687, + "step": 24000, + "teacher_loss": 0.15437576174736023 + }, + { + "epoch": 4.34, + "eval_exact_match": 80.4162724692526, + "eval_f1": 87.7545031856987, + "step": 24000 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.5671945810317993, + "learning_rate": 2.249838589956065e-06, + "loss": 0.2619, + "step": 24001, + "teacher_loss": 0.2279355823993683 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.5974215865135193, + "learning_rate": 2.2486423205115297e-06, + "loss": 0.2258, + "step": 24002, + "teacher_loss": 0.1844923496246338 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.27336937189102173, + "learning_rate": 2.2474463434173953e-06, + "loss": 0.1901, + "step": 24003, + "teacher_loss": 0.18086571991443634 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4351341724395752, + "learning_rate": 2.246250658701086e-06, + "loss": 0.1554, + "step": 24004, + "teacher_loss": 0.12434118986129761 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.1674986481666565, + "learning_rate": 2.245055266390007e-06, + "loss": 0.1997, + "step": 24005, + "teacher_loss": 0.20327094197273254 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.5593927502632141, + "learning_rate": 2.2438601665115693e-06, + "loss": 0.2579, + "step": 24006, + "teacher_loss": 0.2244146168231964 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.7360590100288391, + "learning_rate": 2.242665359093178e-06, + "loss": 0.2534, + "step": 24007, + "teacher_loss": 0.19975420832633972 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.7808851003646851, + "learning_rate": 2.241470844162217e-06, + "loss": 0.2893, + "step": 24008, + "teacher_loss": 0.2346796691417694 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.45372313261032104, + "learning_rate": 2.2402766217460808e-06, + "loss": 0.1975, + "step": 24009, + "teacher_loss": 0.16908572614192963 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.29683762788772583, + "learning_rate": 2.239082691872143e-06, + "loss": 0.1454, + "step": 24010, + "teacher_loss": 0.12856057286262512 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.12044206261634827, + "learning_rate": 2.237889054567782e-06, + "loss": 0.214, + "step": 24011, + "teacher_loss": 0.2243853658437729 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2856326103210449, + "learning_rate": 2.236695709860361e-06, + "loss": 0.2074, + "step": 24012, + "teacher_loss": 0.1986769735813141 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.3515746593475342, + "learning_rate": 2.2355026577772402e-06, + "loss": 0.228, + "step": 24013, + "teacher_loss": 0.2142791450023651 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2595018744468689, + "learning_rate": 2.2343098983457754e-06, + "loss": 0.154, + "step": 24014, + "teacher_loss": 0.14223405718803406 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2016221582889557, + "learning_rate": 2.2331174315933102e-06, + "loss": 0.2365, + "step": 24015, + "teacher_loss": 0.24041730165481567 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.43893638253211975, + "learning_rate": 2.231925257547184e-06, + "loss": 0.2073, + "step": 24016, + "teacher_loss": 0.1815786063671112 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.2347278594970703, + "learning_rate": 2.230733376234733e-06, + "loss": 0.1879, + "step": 24017, + "teacher_loss": 0.18270018696784973 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.6399128437042236, + "learning_rate": 2.2295417876832795e-06, + "loss": 0.3248, + "step": 24018, + "teacher_loss": 0.2897520661354065 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.403209924697876, + "learning_rate": 2.228350491920148e-06, + "loss": 0.1618, + "step": 24019, + "teacher_loss": 0.13497664034366608 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.64995938539505, + "learning_rate": 2.2271594889726467e-06, + "loss": 0.2449, + "step": 24020, + "teacher_loss": 0.19987425208091736 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.8262883424758911, + "learning_rate": 2.2259687788680817e-06, + "loss": 0.2806, + "step": 24021, + "teacher_loss": 0.2199312150478363 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.17706947028636932, + "learning_rate": 2.2247783616337564e-06, + "loss": 0.1899, + "step": 24022, + "teacher_loss": 0.19136342406272888 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.23855794966220856, + "learning_rate": 2.223588237296959e-06, + "loss": 0.1814, + "step": 24023, + "teacher_loss": 0.17504951357841492 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.3920797109603882, + "learning_rate": 2.2223984058849776e-06, + "loss": 0.1899, + "step": 24024, + "teacher_loss": 0.16746732592582703 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4302196502685547, + "learning_rate": 2.221208867425096e-06, + "loss": 0.1941, + "step": 24025, + "teacher_loss": 0.16787764430046082 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.3299451470375061, + "learning_rate": 2.220019621944578e-06, + "loss": 0.2342, + "step": 24026, + "teacher_loss": 0.2235976755619049 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.15167132019996643, + "learning_rate": 2.2188306694706924e-06, + "loss": 0.2695, + "step": 24027, + "teacher_loss": 0.2825429439544678 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.592416524887085, + "learning_rate": 2.217642010030704e-06, + "loss": 0.2124, + "step": 24028, + "teacher_loss": 0.1702287197113037 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.42218273878097534, + "learning_rate": 2.2164536436518574e-06, + "loss": 0.1861, + "step": 24029, + "teacher_loss": 0.15989606082439423 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.40831851959228516, + "learning_rate": 2.215265570361406e-06, + "loss": 0.2237, + "step": 24030, + "teacher_loss": 0.20313864946365356 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 1.3227722644805908, + "learning_rate": 2.21407779018658e-06, + "loss": 0.2593, + "step": 24031, + "teacher_loss": 0.14114999771118164 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.6363925337791443, + "learning_rate": 2.2128903031546188e-06, + "loss": 0.3576, + "step": 24032, + "teacher_loss": 0.3265949487686157 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.8205530643463135, + "learning_rate": 2.2117031092927444e-06, + "loss": 0.2911, + "step": 24033, + "teacher_loss": 0.23226453363895416 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 1.0109715461730957, + "learning_rate": 2.2105162086281742e-06, + "loss": 0.3184, + "step": 24034, + "teacher_loss": 0.24147063493728638 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.5781655311584473, + "learning_rate": 2.209329601188126e-06, + "loss": 0.2042, + "step": 24035, + "teacher_loss": 0.16265341639518738 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.268660306930542, + "learning_rate": 2.2081432869997985e-06, + "loss": 0.164, + "step": 24036, + "teacher_loss": 0.1523973047733307 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.15392056107521057, + "learning_rate": 2.206957266090394e-06, + "loss": 0.113, + "step": 24037, + "teacher_loss": 0.10848838090896606 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4632466435432434, + "learning_rate": 2.2057715384871075e-06, + "loss": 0.1578, + "step": 24038, + "teacher_loss": 0.1238139197230339 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.21561163663864136, + "learning_rate": 2.204586104217115e-06, + "loss": 0.2011, + "step": 24039, + "teacher_loss": 0.19952334463596344 + }, + { + "compression_loss": 0.0, + "epoch": 4.34, + "label_loss": 0.4543490409851074, + "learning_rate": 2.203400963307602e-06, + "loss": 0.2271, + "step": 24040, + "teacher_loss": 0.2018272876739502 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.21323370933532715, + "learning_rate": 2.202216115785743e-06, + "loss": 0.151, + "step": 24041, + "teacher_loss": 0.1441287249326706 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.6229729056358337, + "learning_rate": 2.201031561678698e-06, + "loss": 0.246, + "step": 24042, + "teacher_loss": 0.2041531801223755 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.2918744385242462, + "learning_rate": 2.199847301013619e-06, + "loss": 0.1533, + "step": 24043, + "teacher_loss": 0.13786277174949646 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.35242557525634766, + "learning_rate": 2.1986633338176702e-06, + "loss": 0.2097, + "step": 24044, + "teacher_loss": 0.1937883198261261 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.561247706413269, + "learning_rate": 2.1974796601179926e-06, + "loss": 0.2529, + "step": 24045, + "teacher_loss": 0.21868997812271118 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.1913042962551117, + "learning_rate": 2.1962962799417187e-06, + "loss": 0.1731, + "step": 24046, + "teacher_loss": 0.171085923910141 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.4552307724952698, + "learning_rate": 2.1951131933159834e-06, + "loss": 0.2249, + "step": 24047, + "teacher_loss": 0.19931060075759888 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.1914690136909485, + "learning_rate": 2.193930400267914e-06, + "loss": 0.1304, + "step": 24048, + "teacher_loss": 0.12359493970870972 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.45944905281066895, + "learning_rate": 2.1927479008246233e-06, + "loss": 0.2289, + "step": 24049, + "teacher_loss": 0.20332002639770508 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.6912100911140442, + "learning_rate": 2.191565695013226e-06, + "loss": 0.2125, + "step": 24050, + "teacher_loss": 0.1593523919582367 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.5325913429260254, + "learning_rate": 2.1903837828608282e-06, + "loss": 0.2184, + "step": 24051, + "teacher_loss": 0.1834602802991867 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.516902506351471, + "learning_rate": 2.1892021643945255e-06, + "loss": 0.1905, + "step": 24052, + "teacher_loss": 0.15426768362522125 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.428198903799057, + "learning_rate": 2.1880208396413996e-06, + "loss": 0.2726, + "step": 24053, + "teacher_loss": 0.2553304433822632 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.37251389026641846, + "learning_rate": 2.186839808628553e-06, + "loss": 0.2011, + "step": 24054, + "teacher_loss": 0.18208475410938263 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.2808970808982849, + "learning_rate": 2.1856590713830516e-06, + "loss": 0.1762, + "step": 24055, + "teacher_loss": 0.16459207236766815 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.48111385107040405, + "learning_rate": 2.1844786279319625e-06, + "loss": 0.2589, + "step": 24056, + "teacher_loss": 0.23418283462524414 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.245268315076828, + "learning_rate": 2.1832984783023633e-06, + "loss": 0.1847, + "step": 24057, + "teacher_loss": 0.17793044447898865 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.569835364818573, + "learning_rate": 2.182118622521302e-06, + "loss": 0.3982, + "step": 24058, + "teacher_loss": 0.3791613280773163 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.7276433706283569, + "learning_rate": 2.1809390606158277e-06, + "loss": 0.2101, + "step": 24059, + "teacher_loss": 0.15258993208408356 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.37281936407089233, + "learning_rate": 2.179759792612987e-06, + "loss": 0.1668, + "step": 24060, + "teacher_loss": 0.14387336373329163 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.12428450584411621, + "learning_rate": 2.178580818539822e-06, + "loss": 0.1681, + "step": 24061, + "teacher_loss": 0.1729975938796997 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.3061058223247528, + "learning_rate": 2.1774021384233526e-06, + "loss": 0.1658, + "step": 24062, + "teacher_loss": 0.15022574365139008 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.5158843994140625, + "learning_rate": 2.176223752290609e-06, + "loss": 0.2441, + "step": 24063, + "teacher_loss": 0.21386940777301788 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.3103901147842407, + "learning_rate": 2.175045660168611e-06, + "loss": 0.1868, + "step": 24064, + "teacher_loss": 0.17307725548744202 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.3477204442024231, + "learning_rate": 2.1738678620843612e-06, + "loss": 0.2126, + "step": 24065, + "teacher_loss": 0.19756081700325012 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.40253746509552, + "learning_rate": 2.1726903580648666e-06, + "loss": 0.2156, + "step": 24066, + "teacher_loss": 0.1948145180940628 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.41213327646255493, + "learning_rate": 2.171513148137128e-06, + "loss": 0.228, + "step": 24067, + "teacher_loss": 0.20753689110279083 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.3104656934738159, + "learning_rate": 2.1703362323281257e-06, + "loss": 0.1825, + "step": 24068, + "teacher_loss": 0.16827671229839325 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.2336173951625824, + "learning_rate": 2.1691596106648524e-06, + "loss": 0.156, + "step": 24069, + "teacher_loss": 0.14734283089637756 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.2792956829071045, + "learning_rate": 2.1679832831742773e-06, + "loss": 0.1352, + "step": 24070, + "teacher_loss": 0.11923306435346603 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.36622339487075806, + "learning_rate": 2.1668072498833737e-06, + "loss": 0.1936, + "step": 24071, + "teacher_loss": 0.1744169145822525 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.36221152544021606, + "learning_rate": 2.1656315108191078e-06, + "loss": 0.1902, + "step": 24072, + "teacher_loss": 0.17105789482593536 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.5037327408790588, + "learning_rate": 2.1644560660084266e-06, + "loss": 0.3178, + "step": 24073, + "teacher_loss": 0.29709190130233765 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.6849277019500732, + "learning_rate": 2.163280915478289e-06, + "loss": 0.6393, + "step": 24074, + "teacher_loss": 0.6342679858207703 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.4697837233543396, + "learning_rate": 2.1621060592556312e-06, + "loss": 0.2168, + "step": 24075, + "teacher_loss": 0.18868029117584229 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.45679864287376404, + "learning_rate": 2.1609314973673905e-06, + "loss": 0.2284, + "step": 24076, + "teacher_loss": 0.20305031538009644 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.21940001845359802, + "learning_rate": 2.1597572298405e-06, + "loss": 0.2657, + "step": 24077, + "teacher_loss": 0.2708034813404083 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.4848310947418213, + "learning_rate": 2.1585832567018767e-06, + "loss": 0.1956, + "step": 24078, + "teacher_loss": 0.16347095370292664 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.6124930381774902, + "learning_rate": 2.1574095779784385e-06, + "loss": 0.3337, + "step": 24079, + "teacher_loss": 0.30269038677215576 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.4767211973667145, + "learning_rate": 2.156236193697098e-06, + "loss": 0.2064, + "step": 24080, + "teacher_loss": 0.17635218799114227 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.23997816443443298, + "learning_rate": 2.15506310388475e-06, + "loss": 0.2028, + "step": 24081, + "teacher_loss": 0.1986789107322693 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.2863200902938843, + "learning_rate": 2.1538903085682984e-06, + "loss": 0.1676, + "step": 24082, + "teacher_loss": 0.15440011024475098 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.5700246691703796, + "learning_rate": 2.1527178077746223e-06, + "loss": 0.192, + "step": 24083, + "teacher_loss": 0.15001827478408813 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.6011705994606018, + "learning_rate": 2.1515456015306096e-06, + "loss": 0.2763, + "step": 24084, + "teacher_loss": 0.24024289846420288 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.3729507029056549, + "learning_rate": 2.1503736898631384e-06, + "loss": 0.1774, + "step": 24085, + "teacher_loss": 0.15566977858543396 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.5780166387557983, + "learning_rate": 2.14920207279907e-06, + "loss": 0.2374, + "step": 24086, + "teacher_loss": 0.1995784044265747 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.30591559410095215, + "learning_rate": 2.148030750365269e-06, + "loss": 0.1953, + "step": 24087, + "teacher_loss": 0.18295682966709137 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.36636483669281006, + "learning_rate": 2.146859722588595e-06, + "loss": 0.223, + "step": 24088, + "teacher_loss": 0.20705002546310425 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.6003850698471069, + "learning_rate": 2.1456889894958864e-06, + "loss": 0.2416, + "step": 24089, + "teacher_loss": 0.20170001685619354 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.22337886691093445, + "learning_rate": 2.144518551113992e-06, + "loss": 0.182, + "step": 24090, + "teacher_loss": 0.17737668752670288 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.513768196105957, + "learning_rate": 2.143348407469747e-06, + "loss": 0.2697, + "step": 24091, + "teacher_loss": 0.24252937734127045 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.2958924174308777, + "learning_rate": 2.142178558589976e-06, + "loss": 0.1714, + "step": 24092, + "teacher_loss": 0.15751270949840546 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.1881890445947647, + "learning_rate": 2.1410090045014985e-06, + "loss": 0.1421, + "step": 24093, + "teacher_loss": 0.13699409365653992 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.3031767010688782, + "learning_rate": 2.1398397452311323e-06, + "loss": 0.1916, + "step": 24094, + "teacher_loss": 0.1792091727256775 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.22616443037986755, + "learning_rate": 2.138670780805685e-06, + "loss": 0.1888, + "step": 24095, + "teacher_loss": 0.18464316427707672 + }, + { + "compression_loss": 0.0, + "epoch": 4.35, + "label_loss": 0.1772937774658203, + "learning_rate": 2.137502111251954e-06, + "loss": 0.1202, + "step": 24096, + "teacher_loss": 0.11382432281970978 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.20297923684120178, + "learning_rate": 2.136333736596738e-06, + "loss": 0.1417, + "step": 24097, + "teacher_loss": 0.13489598035812378 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3368719220161438, + "learning_rate": 2.135165656866822e-06, + "loss": 0.1511, + "step": 24098, + "teacher_loss": 0.13049712777137756 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.30626314878463745, + "learning_rate": 2.1339978720889856e-06, + "loss": 0.1621, + "step": 24099, + "teacher_loss": 0.14607040584087372 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.22838753461837769, + "learning_rate": 2.1328303822900025e-06, + "loss": 0.1827, + "step": 24100, + "teacher_loss": 0.17767687141895294 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.4264363646507263, + "learning_rate": 2.1316631874966457e-06, + "loss": 0.203, + "step": 24101, + "teacher_loss": 0.1781388819217682 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.6638039350509644, + "learning_rate": 2.1304962877356692e-06, + "loss": 0.3321, + "step": 24102, + "teacher_loss": 0.29519668221473694 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.6335201263427734, + "learning_rate": 2.129329683033823e-06, + "loss": 0.2358, + "step": 24103, + "teacher_loss": 0.19159609079360962 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3509160876274109, + "learning_rate": 2.1281633734178645e-06, + "loss": 0.2267, + "step": 24104, + "teacher_loss": 0.21285486221313477 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.4193546772003174, + "learning_rate": 2.1269973589145284e-06, + "loss": 0.164, + "step": 24105, + "teacher_loss": 0.13557904958724976 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3707237243652344, + "learning_rate": 2.1258316395505406e-06, + "loss": 0.1814, + "step": 24106, + "teacher_loss": 0.16035783290863037 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.47226423025131226, + "learning_rate": 2.1246662153526407e-06, + "loss": 0.2141, + "step": 24107, + "teacher_loss": 0.18538369238376617 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3969842493534088, + "learning_rate": 2.123501086347543e-06, + "loss": 0.2752, + "step": 24108, + "teacher_loss": 0.2616182565689087 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.32110410928726196, + "learning_rate": 2.122336252561957e-06, + "loss": 0.2018, + "step": 24109, + "teacher_loss": 0.18854191899299622 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.853962779045105, + "learning_rate": 2.1211717140225906e-06, + "loss": 0.2528, + "step": 24110, + "teacher_loss": 0.1860085129737854 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 1.148671269416809, + "learning_rate": 2.120007470756147e-06, + "loss": 0.2782, + "step": 24111, + "teacher_loss": 0.18146607279777527 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.10295100510120392, + "learning_rate": 2.1188435227893145e-06, + "loss": 0.1288, + "step": 24112, + "teacher_loss": 0.13170501589775085 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3872445523738861, + "learning_rate": 2.117679870148779e-06, + "loss": 0.1878, + "step": 24113, + "teacher_loss": 0.16563282907009125 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.6574846506118774, + "learning_rate": 2.1165165128612235e-06, + "loss": 0.2303, + "step": 24114, + "teacher_loss": 0.18283995985984802 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.44447746872901917, + "learning_rate": 2.1153534509533174e-06, + "loss": 0.1941, + "step": 24115, + "teacher_loss": 0.16631552577018738 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.17659206688404083, + "learning_rate": 2.1141906844517207e-06, + "loss": 0.1412, + "step": 24116, + "teacher_loss": 0.13726702332496643 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.5166727900505066, + "learning_rate": 2.113028213383105e-06, + "loss": 0.2705, + "step": 24117, + "teacher_loss": 0.2431592345237732 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.2641560137271881, + "learning_rate": 2.111866037774115e-06, + "loss": 0.1587, + "step": 24118, + "teacher_loss": 0.14694499969482422 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.15038985013961792, + "learning_rate": 2.1107041576513917e-06, + "loss": 0.1892, + "step": 24119, + "teacher_loss": 0.19353672862052917 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.6606764197349548, + "learning_rate": 2.1095425730415796e-06, + "loss": 0.2075, + "step": 24120, + "teacher_loss": 0.15713909268379211 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3634679913520813, + "learning_rate": 2.1083812839713117e-06, + "loss": 0.1714, + "step": 24121, + "teacher_loss": 0.15001505613327026 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.6436595320701599, + "learning_rate": 2.1072202904672056e-06, + "loss": 0.2287, + "step": 24122, + "teacher_loss": 0.18261811137199402 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.6445033550262451, + "learning_rate": 2.1060595925558844e-06, + "loss": 0.2215, + "step": 24123, + "teacher_loss": 0.1744779646396637 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.5155205726623535, + "learning_rate": 2.1048991902639623e-06, + "loss": 0.2101, + "step": 24124, + "teacher_loss": 0.17621192336082458 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.2924107313156128, + "learning_rate": 2.1037390836180366e-06, + "loss": 0.1535, + "step": 24125, + "teacher_loss": 0.13808830082416534 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3616622984409332, + "learning_rate": 2.1025792726447083e-06, + "loss": 0.2339, + "step": 24126, + "teacher_loss": 0.21969962120056152 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.34055787324905396, + "learning_rate": 2.101419757370572e-06, + "loss": 0.176, + "step": 24127, + "teacher_loss": 0.1577366441488266 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.7559992074966431, + "learning_rate": 2.1002605378222068e-06, + "loss": 0.2821, + "step": 24128, + "teacher_loss": 0.2294563204050064 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.8462119102478027, + "learning_rate": 2.0991016140261903e-06, + "loss": 0.3052, + "step": 24129, + "teacher_loss": 0.24507129192352295 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.5615392923355103, + "learning_rate": 2.0979429860090985e-06, + "loss": 0.224, + "step": 24130, + "teacher_loss": 0.1864570528268814 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.8582161664962769, + "learning_rate": 2.096784653797489e-06, + "loss": 0.4078, + "step": 24131, + "teacher_loss": 0.3578042984008789 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.16840235888957977, + "learning_rate": 2.0956266174179263e-06, + "loss": 0.1537, + "step": 24132, + "teacher_loss": 0.1520490050315857 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.4229716658592224, + "learning_rate": 2.0944688768969496e-06, + "loss": 0.2504, + "step": 24133, + "teacher_loss": 0.2312595546245575 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.353396475315094, + "learning_rate": 2.093311432261111e-06, + "loss": 0.1712, + "step": 24134, + "teacher_loss": 0.15091989934444427 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.21631215512752533, + "learning_rate": 2.0921542835369472e-06, + "loss": 0.1712, + "step": 24135, + "teacher_loss": 0.16614016890525818 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.4670332074165344, + "learning_rate": 2.0909974307509826e-06, + "loss": 0.466, + "step": 24136, + "teacher_loss": 0.4658501148223877 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.199095219373703, + "learning_rate": 2.0898408739297476e-06, + "loss": 0.213, + "step": 24137, + "teacher_loss": 0.21459971368312836 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.36358705163002014, + "learning_rate": 2.0886846130997504e-06, + "loss": 0.2243, + "step": 24138, + "teacher_loss": 0.20887590944766998 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.1979871690273285, + "learning_rate": 2.087528648287507e-06, + "loss": 0.1339, + "step": 24139, + "teacher_loss": 0.12672746181488037 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.5115912556648254, + "learning_rate": 2.0863729795195196e-06, + "loss": 0.1766, + "step": 24140, + "teacher_loss": 0.13942985236644745 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.5048018097877502, + "learning_rate": 2.08521760682228e-06, + "loss": 0.2367, + "step": 24141, + "teacher_loss": 0.20696169137954712 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.2570592761039734, + "learning_rate": 2.084062530222284e-06, + "loss": 0.1491, + "step": 24142, + "teacher_loss": 0.13714447617530823 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.5125433206558228, + "learning_rate": 2.082907749746005e-06, + "loss": 0.2377, + "step": 24143, + "teacher_loss": 0.20713132619857788 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.8262771368026733, + "learning_rate": 2.081753265419925e-06, + "loss": 0.2367, + "step": 24144, + "teacher_loss": 0.1712222695350647 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.4055216908454895, + "learning_rate": 2.080599077270515e-06, + "loss": 0.2144, + "step": 24145, + "teacher_loss": 0.19317930936813354 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.824671745300293, + "learning_rate": 2.079445185324231e-06, + "loss": 0.3085, + "step": 24146, + "teacher_loss": 0.25112730264663696 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3611491918563843, + "learning_rate": 2.0782915896075305e-06, + "loss": 0.1831, + "step": 24147, + "teacher_loss": 0.16334109008312225 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3308403491973877, + "learning_rate": 2.077138290146867e-06, + "loss": 0.1913, + "step": 24148, + "teacher_loss": 0.1758241355419159 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.40630969405174255, + "learning_rate": 2.075985286968673e-06, + "loss": 0.2998, + "step": 24149, + "teacher_loss": 0.2879410982131958 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.25508078932762146, + "learning_rate": 2.0748325800993884e-06, + "loss": 0.1606, + "step": 24150, + "teacher_loss": 0.150080144405365 + }, + { + "compression_loss": 0.0, + "epoch": 4.36, + "label_loss": 0.3130847215652466, + "learning_rate": 2.073680169565445e-06, + "loss": 0.1843, + "step": 24151, + "teacher_loss": 0.16996704041957855 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.23449155688285828, + "learning_rate": 2.0725280553932552e-06, + "loss": 0.1913, + "step": 24152, + "teacher_loss": 0.1865461766719818 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.38948217034339905, + "learning_rate": 2.071376237609241e-06, + "loss": 0.1799, + "step": 24153, + "teacher_loss": 0.15664543211460114 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.27497944235801697, + "learning_rate": 2.07022471623981e-06, + "loss": 0.1542, + "step": 24154, + "teacher_loss": 0.14075277745723724 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.4171023964881897, + "learning_rate": 2.0690734913113603e-06, + "loss": 0.1628, + "step": 24155, + "teacher_loss": 0.1345631182193756 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3434973359107971, + "learning_rate": 2.067922562850283e-06, + "loss": 0.2004, + "step": 24156, + "teacher_loss": 0.18453413248062134 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3588903546333313, + "learning_rate": 2.0667719308829686e-06, + "loss": 0.2062, + "step": 24157, + "teacher_loss": 0.18921679258346558 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.45504844188690186, + "learning_rate": 2.0656215954358025e-06, + "loss": 0.1888, + "step": 24158, + "teacher_loss": 0.15918779373168945 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.26276034116744995, + "learning_rate": 2.064471556535151e-06, + "loss": 0.2209, + "step": 24159, + "teacher_loss": 0.21629098057746887 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 1.4562435150146484, + "learning_rate": 2.0633218142073833e-06, + "loss": 0.3214, + "step": 24160, + "teacher_loss": 0.19531689584255219 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3991554379463196, + "learning_rate": 2.062172368478863e-06, + "loss": 0.2685, + "step": 24161, + "teacher_loss": 0.25393807888031006 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.44233760237693787, + "learning_rate": 2.061023219375938e-06, + "loss": 0.2474, + "step": 24162, + "teacher_loss": 0.22574535012245178 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.48257550597190857, + "learning_rate": 2.0598743669249565e-06, + "loss": 0.2549, + "step": 24163, + "teacher_loss": 0.2295970618724823 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.4222247004508972, + "learning_rate": 2.0587258111522635e-06, + "loss": 0.168, + "step": 24164, + "teacher_loss": 0.13978828489780426 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.684607982635498, + "learning_rate": 2.0575775520841878e-06, + "loss": 0.2716, + "step": 24165, + "teacher_loss": 0.22576190531253815 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.275020956993103, + "learning_rate": 2.0564295897470487e-06, + "loss": 0.1678, + "step": 24166, + "teacher_loss": 0.15589390695095062 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.33841192722320557, + "learning_rate": 2.055281924167178e-06, + "loss": 0.2097, + "step": 24167, + "teacher_loss": 0.1953493058681488 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.2886737287044525, + "learning_rate": 2.054134555370884e-06, + "loss": 0.1733, + "step": 24168, + "teacher_loss": 0.16047939658164978 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.48450154066085815, + "learning_rate": 2.0529874833844662e-06, + "loss": 0.1902, + "step": 24169, + "teacher_loss": 0.1575442999601364 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.18101289868354797, + "learning_rate": 2.0518407082342294e-06, + "loss": 0.148, + "step": 24170, + "teacher_loss": 0.14432629942893982 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.5600560307502747, + "learning_rate": 2.0506942299464672e-06, + "loss": 0.2153, + "step": 24171, + "teacher_loss": 0.17702136933803558 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3536297082901001, + "learning_rate": 2.04954804854746e-06, + "loss": 0.2491, + "step": 24172, + "teacher_loss": 0.23747684061527252 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.25043439865112305, + "learning_rate": 2.048402164063487e-06, + "loss": 0.1877, + "step": 24173, + "teacher_loss": 0.18071895837783813 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.5213544368743896, + "learning_rate": 2.0472565765208256e-06, + "loss": 0.2366, + "step": 24174, + "teacher_loss": 0.20491428673267365 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.1874285638332367, + "learning_rate": 2.046111285945733e-06, + "loss": 0.1703, + "step": 24175, + "teacher_loss": 0.16837960481643677 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.20623981952667236, + "learning_rate": 2.044966292364472e-06, + "loss": 0.1715, + "step": 24176, + "teacher_loss": 0.16760051250457764 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.29263463616371155, + "learning_rate": 2.043821595803296e-06, + "loss": 0.1681, + "step": 24177, + "teacher_loss": 0.1542688012123108 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.4170669913291931, + "learning_rate": 2.0426771962884437e-06, + "loss": 0.1615, + "step": 24178, + "teacher_loss": 0.13315586745738983 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.2835770845413208, + "learning_rate": 2.0415330938461503e-06, + "loss": 0.1872, + "step": 24179, + "teacher_loss": 0.17653462290763855 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.32481706142425537, + "learning_rate": 2.0403892885026587e-06, + "loss": 0.2032, + "step": 24180, + "teacher_loss": 0.18967324495315552 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.622667133808136, + "learning_rate": 2.039245780284185e-06, + "loss": 0.2518, + "step": 24181, + "teacher_loss": 0.21059229969978333 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.2885042726993561, + "learning_rate": 2.0381025692169437e-06, + "loss": 0.1563, + "step": 24182, + "teacher_loss": 0.14164510369300842 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3007691502571106, + "learning_rate": 2.0369596553271496e-06, + "loss": 0.1683, + "step": 24183, + "teacher_loss": 0.15352830290794373 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.6698163747787476, + "learning_rate": 2.0358170386410072e-06, + "loss": 0.2108, + "step": 24184, + "teacher_loss": 0.1597941815853119 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 1.22364342212677, + "learning_rate": 2.034674719184709e-06, + "loss": 0.3441, + "step": 24185, + "teacher_loss": 0.24633751809597015 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.1892719566822052, + "learning_rate": 2.0335326969844476e-06, + "loss": 0.1521, + "step": 24186, + "teacher_loss": 0.14792287349700928 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.7989034652709961, + "learning_rate": 2.0323909720664096e-06, + "loss": 0.305, + "step": 24187, + "teacher_loss": 0.25011682510375977 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.35490041971206665, + "learning_rate": 2.0312495444567645e-06, + "loss": 0.1823, + "step": 24188, + "teacher_loss": 0.16307701170444489 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.587756335735321, + "learning_rate": 2.0301084141816836e-06, + "loss": 0.1952, + "step": 24189, + "teacher_loss": 0.1515846699476242 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.39118218421936035, + "learning_rate": 2.0289675812673363e-06, + "loss": 0.1856, + "step": 24190, + "teacher_loss": 0.1627466380596161 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.5157334208488464, + "learning_rate": 2.027827045739868e-06, + "loss": 0.1939, + "step": 24191, + "teacher_loss": 0.1581641435623169 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.25750893354415894, + "learning_rate": 2.026686807625437e-06, + "loss": 0.1712, + "step": 24192, + "teacher_loss": 0.16165369749069214 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.4656542241573334, + "learning_rate": 2.0255468669501777e-06, + "loss": 0.1809, + "step": 24193, + "teacher_loss": 0.14931261539459229 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.36860764026641846, + "learning_rate": 2.0244072237402307e-06, + "loss": 0.1864, + "step": 24194, + "teacher_loss": 0.16619321703910828 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.5511794090270996, + "learning_rate": 2.023267878021725e-06, + "loss": 0.2364, + "step": 24195, + "teacher_loss": 0.20137529075145721 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.47485533356666565, + "learning_rate": 2.0221288298207787e-06, + "loss": 0.216, + "step": 24196, + "teacher_loss": 0.1872279942035675 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.7470526695251465, + "learning_rate": 2.0209900791635116e-06, + "loss": 0.1955, + "step": 24197, + "teacher_loss": 0.1342616230249405 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.4112088084220886, + "learning_rate": 2.0198516260760254e-06, + "loss": 0.2587, + "step": 24198, + "teacher_loss": 0.24180760979652405 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.312944620847702, + "learning_rate": 2.0187134705844233e-06, + "loss": 0.1616, + "step": 24199, + "teacher_loss": 0.1447295844554901 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.4228675961494446, + "learning_rate": 2.017575612714807e-06, + "loss": 0.3033, + "step": 24200, + "teacher_loss": 0.2900681495666504 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.28748029470443726, + "learning_rate": 2.0164380524932534e-06, + "loss": 0.1755, + "step": 24201, + "teacher_loss": 0.1630670726299286 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.24348270893096924, + "learning_rate": 2.01530078994585e-06, + "loss": 0.1714, + "step": 24202, + "teacher_loss": 0.1634211540222168 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3653257489204407, + "learning_rate": 2.0141638250986714e-06, + "loss": 0.2104, + "step": 24203, + "teacher_loss": 0.19316327571868896 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.32686927914619446, + "learning_rate": 2.0130271579777816e-06, + "loss": 0.2495, + "step": 24204, + "teacher_loss": 0.24090583622455597 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.7371385097503662, + "learning_rate": 2.0118907886092443e-06, + "loss": 0.288, + "step": 24205, + "teacher_loss": 0.23814445734024048 + }, + { + "compression_loss": 0.0, + "epoch": 4.37, + "label_loss": 0.3627144396305084, + "learning_rate": 2.010754717019107e-06, + "loss": 0.2264, + "step": 24206, + "teacher_loss": 0.2112187147140503 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.33188870549201965, + "learning_rate": 2.0096189432334194e-06, + "loss": 0.1833, + "step": 24207, + "teacher_loss": 0.1668439656496048 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.24071869254112244, + "learning_rate": 2.0084834672782272e-06, + "loss": 0.1629, + "step": 24208, + "teacher_loss": 0.15422964096069336 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.23737987875938416, + "learning_rate": 2.007348289179554e-06, + "loss": 0.2118, + "step": 24209, + "teacher_loss": 0.20901280641555786 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.612793505191803, + "learning_rate": 2.0062134089634317e-06, + "loss": 0.3046, + "step": 24210, + "teacher_loss": 0.2703706622123718 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6180058717727661, + "learning_rate": 2.005078826655882e-06, + "loss": 0.2836, + "step": 24211, + "teacher_loss": 0.24641090631484985 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.42693600058555603, + "learning_rate": 2.003944542282909e-06, + "loss": 0.2215, + "step": 24212, + "teacher_loss": 0.19866369664669037 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.351433664560318, + "learning_rate": 2.002810555870526e-06, + "loss": 0.1612, + "step": 24213, + "teacher_loss": 0.140067458152771 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.3575143814086914, + "learning_rate": 2.001676867444731e-06, + "loss": 0.1898, + "step": 24214, + "teacher_loss": 0.17114710807800293 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.36525657773017883, + "learning_rate": 2.0005434770315145e-06, + "loss": 0.2554, + "step": 24215, + "teacher_loss": 0.24320705235004425 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.21546344459056854, + "learning_rate": 1.9994103846568553e-06, + "loss": 0.1538, + "step": 24216, + "teacher_loss": 0.14692160487174988 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.1961396187543869, + "learning_rate": 1.9982775903467438e-06, + "loss": 0.158, + "step": 24217, + "teacher_loss": 0.15379637479782104 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.7073092460632324, + "learning_rate": 1.9971450941271462e-06, + "loss": 0.5855, + "step": 24218, + "teacher_loss": 0.5719879865646362 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.5682158470153809, + "learning_rate": 1.9960128960240255e-06, + "loss": 0.3044, + "step": 24219, + "teacher_loss": 0.2750796675682068 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6237989068031311, + "learning_rate": 1.99488099606334e-06, + "loss": 0.3087, + "step": 24220, + "teacher_loss": 0.2736767828464508 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.37909480929374695, + "learning_rate": 1.993749394271045e-06, + "loss": 0.229, + "step": 24221, + "teacher_loss": 0.21229872107505798 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6887695789337158, + "learning_rate": 1.992618090673079e-06, + "loss": 0.2414, + "step": 24222, + "teacher_loss": 0.19167444109916687 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.20408578217029572, + "learning_rate": 1.9914870852953815e-06, + "loss": 0.1556, + "step": 24223, + "teacher_loss": 0.15017807483673096 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.3909336030483246, + "learning_rate": 1.990356378163887e-06, + "loss": 0.1596, + "step": 24224, + "teacher_loss": 0.133936807513237 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.3879234492778778, + "learning_rate": 1.9892259693045156e-06, + "loss": 0.1996, + "step": 24225, + "teacher_loss": 0.17866727709770203 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.9407852292060852, + "learning_rate": 1.9880958587431775e-06, + "loss": 0.3175, + "step": 24226, + "teacher_loss": 0.24827361106872559 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.7187612652778625, + "learning_rate": 1.9869660465057965e-06, + "loss": 0.2416, + "step": 24227, + "teacher_loss": 0.18862125277519226 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6091758012771606, + "learning_rate": 1.985836532618269e-06, + "loss": 0.2576, + "step": 24228, + "teacher_loss": 0.218548983335495 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.20917585492134094, + "learning_rate": 1.984707317106485e-06, + "loss": 0.2206, + "step": 24229, + "teacher_loss": 0.22184666991233826 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.28677740693092346, + "learning_rate": 1.9835783999963463e-06, + "loss": 0.1687, + "step": 24230, + "teacher_loss": 0.15562953054904938 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.5958304405212402, + "learning_rate": 1.9824497813137303e-06, + "loss": 0.2335, + "step": 24231, + "teacher_loss": 0.19329044222831726 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.9408868551254272, + "learning_rate": 1.9813214610845083e-06, + "loss": 0.2803, + "step": 24232, + "teacher_loss": 0.20685502886772156 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.33608466386795044, + "learning_rate": 1.980193439334554e-06, + "loss": 0.2287, + "step": 24233, + "teacher_loss": 0.2168155312538147 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.40105342864990234, + "learning_rate": 1.9790657160897317e-06, + "loss": 0.1859, + "step": 24234, + "teacher_loss": 0.16195125877857208 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.5643248558044434, + "learning_rate": 1.9779382913758898e-06, + "loss": 0.2499, + "step": 24235, + "teacher_loss": 0.2149730622768402 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6095297932624817, + "learning_rate": 1.976811165218882e-06, + "loss": 0.2231, + "step": 24236, + "teacher_loss": 0.180125892162323 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.33640748262405396, + "learning_rate": 1.9756843376445512e-06, + "loss": 0.259, + "step": 24237, + "teacher_loss": 0.25038206577301025 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6414741277694702, + "learning_rate": 1.974557808678726e-06, + "loss": 0.2465, + "step": 24238, + "teacher_loss": 0.20258015394210815 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.46503180265426636, + "learning_rate": 1.973431578347239e-06, + "loss": 0.2662, + "step": 24239, + "teacher_loss": 0.24408169090747833 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6486656665802002, + "learning_rate": 1.9723056466759116e-06, + "loss": 0.205, + "step": 24240, + "teacher_loss": 0.1556711494922638 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.22172841429710388, + "learning_rate": 1.9711800136905585e-06, + "loss": 0.1537, + "step": 24241, + "teacher_loss": 0.1461057811975479 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.27311083674430847, + "learning_rate": 1.9700546794169804e-06, + "loss": 0.2176, + "step": 24242, + "teacher_loss": 0.21142545342445374 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.42104649543762207, + "learning_rate": 1.968929643880984e-06, + "loss": 0.2236, + "step": 24243, + "teacher_loss": 0.20164865255355835 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.1840902864933014, + "learning_rate": 1.967804907108365e-06, + "loss": 0.2031, + "step": 24244, + "teacher_loss": 0.20526769757270813 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.24259570240974426, + "learning_rate": 1.9666804691249043e-06, + "loss": 0.149, + "step": 24245, + "teacher_loss": 0.13858619332313538 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.4887298047542572, + "learning_rate": 1.965556329956383e-06, + "loss": 0.2336, + "step": 24246, + "teacher_loss": 0.20521315932273865 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.5103280544281006, + "learning_rate": 1.9644324896285787e-06, + "loss": 0.2249, + "step": 24247, + "teacher_loss": 0.19322550296783447 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.4663625955581665, + "learning_rate": 1.9633089481672528e-06, + "loss": 0.2409, + "step": 24248, + "teacher_loss": 0.21582567691802979 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.10184285044670105, + "learning_rate": 1.9621857055981664e-06, + "loss": 0.1671, + "step": 24249, + "teacher_loss": 0.17435456812381744 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.5180944800376892, + "learning_rate": 1.961062761947075e-06, + "loss": 0.2385, + "step": 24250, + "teacher_loss": 0.20748373866081238 + }, + { + "epoch": 4.38, + "eval_exact_match": 80.40681173131505, + "eval_f1": 87.699795173519, + "step": 24250 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6319336891174316, + "learning_rate": 1.9599401172397186e-06, + "loss": 0.1727, + "step": 24251, + "teacher_loss": 0.12171150743961334 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.37688523530960083, + "learning_rate": 1.9588177715018395e-06, + "loss": 0.168, + "step": 24252, + "teacher_loss": 0.1447594165802002 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.6969236135482788, + "learning_rate": 1.9576957247591724e-06, + "loss": 0.1678, + "step": 24253, + "teacher_loss": 0.10901831835508347 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.2916853725910187, + "learning_rate": 1.9565739770374367e-06, + "loss": 0.2035, + "step": 24254, + "teacher_loss": 0.19373945891857147 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.3300135135650635, + "learning_rate": 1.955452528362357e-06, + "loss": 0.1741, + "step": 24255, + "teacher_loss": 0.1567775011062622 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.4738526940345764, + "learning_rate": 1.954331378759637e-06, + "loss": 0.2582, + "step": 24256, + "teacher_loss": 0.23427650332450867 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.422522634267807, + "learning_rate": 1.9532105282549856e-06, + "loss": 0.2011, + "step": 24257, + "teacher_loss": 0.17644914984703064 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.43353402614593506, + "learning_rate": 1.9520899768741045e-06, + "loss": 0.2792, + "step": 24258, + "teacher_loss": 0.26208794116973877 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.1121024638414383, + "learning_rate": 1.9509697246426773e-06, + "loss": 0.23, + "step": 24259, + "teacher_loss": 0.24314171075820923 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.3976435363292694, + "learning_rate": 1.9498497715863943e-06, + "loss": 0.2561, + "step": 24260, + "teacher_loss": 0.24034327268600464 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.590404212474823, + "learning_rate": 1.948730117730926e-06, + "loss": 0.2189, + "step": 24261, + "teacher_loss": 0.17764912545681 + }, + { + "compression_loss": 0.0, + "epoch": 4.38, + "label_loss": 0.5480066537857056, + "learning_rate": 1.9476107631019476e-06, + "loss": 0.2518, + "step": 24262, + "teacher_loss": 0.2189018577337265 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.24600154161453247, + "learning_rate": 1.946491707725122e-06, + "loss": 0.194, + "step": 24263, + "teacher_loss": 0.18817198276519775 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.4572528600692749, + "learning_rate": 1.945372951626104e-06, + "loss": 0.2135, + "step": 24264, + "teacher_loss": 0.1864059865474701 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3452264070510864, + "learning_rate": 1.9442544948305468e-06, + "loss": 0.2125, + "step": 24265, + "teacher_loss": 0.19775322079658508 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.47714322805404663, + "learning_rate": 1.943136337364089e-06, + "loss": 0.2063, + "step": 24266, + "teacher_loss": 0.17618080973625183 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.34876763820648193, + "learning_rate": 1.9420184792523675e-06, + "loss": 0.1749, + "step": 24267, + "teacher_loss": 0.15558937191963196 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.460848867893219, + "learning_rate": 1.940900920521015e-06, + "loss": 0.1925, + "step": 24268, + "teacher_loss": 0.16267919540405273 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.2802533209323883, + "learning_rate": 1.939783661195649e-06, + "loss": 0.1656, + "step": 24269, + "teacher_loss": 0.15284088253974915 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.19455453753471375, + "learning_rate": 1.9386667013018855e-06, + "loss": 0.1705, + "step": 24270, + "teacher_loss": 0.16787326335906982 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.5968358516693115, + "learning_rate": 1.9375500408653398e-06, + "loss": 0.2561, + "step": 24271, + "teacher_loss": 0.2182273268699646 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.13981568813323975, + "learning_rate": 1.936433679911604e-06, + "loss": 0.1604, + "step": 24272, + "teacher_loss": 0.1627308577299118 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.7101109623908997, + "learning_rate": 1.935317618466278e-06, + "loss": 0.5776, + "step": 24273, + "teacher_loss": 0.5629128813743591 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.31860584020614624, + "learning_rate": 1.9342018565549514e-06, + "loss": 0.1414, + "step": 24274, + "teacher_loss": 0.12166139483451843 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.23628222942352295, + "learning_rate": 1.9330863942032017e-06, + "loss": 0.1594, + "step": 24275, + "teacher_loss": 0.15084734559059143 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.1805683970451355, + "learning_rate": 1.931971231436602e-06, + "loss": 0.1494, + "step": 24276, + "teacher_loss": 0.1459105908870697 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.256730854511261, + "learning_rate": 1.9308563682807255e-06, + "loss": 0.1899, + "step": 24277, + "teacher_loss": 0.18248644471168518 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.4917605221271515, + "learning_rate": 1.9297418047611315e-06, + "loss": 0.2464, + "step": 24278, + "teacher_loss": 0.21916818618774414 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.7212333083152771, + "learning_rate": 1.9286275409033628e-06, + "loss": 0.3114, + "step": 24279, + "teacher_loss": 0.26587826013565063 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.39056336879730225, + "learning_rate": 1.9275135767329803e-06, + "loss": 0.2073, + "step": 24280, + "teacher_loss": 0.18690626323223114 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.14089876413345337, + "learning_rate": 1.9263999122755195e-06, + "loss": 0.1672, + "step": 24281, + "teacher_loss": 0.17017263174057007 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.5980688333511353, + "learning_rate": 1.925286547556509e-06, + "loss": 0.236, + "step": 24282, + "teacher_loss": 0.19576843082904816 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.713057279586792, + "learning_rate": 1.924173482601477e-06, + "loss": 0.304, + "step": 24283, + "teacher_loss": 0.25855037569999695 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.14567795395851135, + "learning_rate": 1.923060717435947e-06, + "loss": 0.1405, + "step": 24284, + "teacher_loss": 0.1399250626564026 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.6200891733169556, + "learning_rate": 1.921948252085425e-06, + "loss": 0.2632, + "step": 24285, + "teacher_loss": 0.22357742488384247 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.2569945454597473, + "learning_rate": 1.9208360865754197e-06, + "loss": 0.284, + "step": 24286, + "teacher_loss": 0.2869962453842163 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.25493282079696655, + "learning_rate": 1.9197242209314336e-06, + "loss": 0.2105, + "step": 24287, + "teacher_loss": 0.20560970902442932 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 1.1352591514587402, + "learning_rate": 1.918612655178954e-06, + "loss": 0.3576, + "step": 24288, + "teacher_loss": 0.27114880084991455 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3497392535209656, + "learning_rate": 1.917501389343459e-06, + "loss": 0.1734, + "step": 24289, + "teacher_loss": 0.15375816822052002 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.4669134020805359, + "learning_rate": 1.91639042345044e-06, + "loss": 0.2497, + "step": 24290, + "teacher_loss": 0.2255280613899231 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3916887044906616, + "learning_rate": 1.9152797575253627e-06, + "loss": 0.2167, + "step": 24291, + "teacher_loss": 0.19721734523773193 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.24773097038269043, + "learning_rate": 1.9141693915936874e-06, + "loss": 0.1716, + "step": 24292, + "teacher_loss": 0.16312453150749207 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.4091212749481201, + "learning_rate": 1.9130593256808736e-06, + "loss": 0.1854, + "step": 24293, + "teacher_loss": 0.16050340235233307 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.6714450716972351, + "learning_rate": 1.9119495598123776e-06, + "loss": 0.2372, + "step": 24294, + "teacher_loss": 0.18894308805465698 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.49575275182724, + "learning_rate": 1.910840094013634e-06, + "loss": 0.2896, + "step": 24295, + "teacher_loss": 0.26666566729545593 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.36540377140045166, + "learning_rate": 1.9097309283100823e-06, + "loss": 0.1822, + "step": 24296, + "teacher_loss": 0.16179600358009338 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.5823034048080444, + "learning_rate": 1.9086220627271587e-06, + "loss": 0.229, + "step": 24297, + "teacher_loss": 0.1897973120212555 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.7699387073516846, + "learning_rate": 1.9075134972902787e-06, + "loss": 0.2583, + "step": 24298, + "teacher_loss": 0.2014748901128769 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.19820314645767212, + "learning_rate": 1.9064052320248594e-06, + "loss": 0.172, + "step": 24299, + "teacher_loss": 0.16913267970085144 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.25550639629364014, + "learning_rate": 1.9052972669563157e-06, + "loss": 0.1602, + "step": 24300, + "teacher_loss": 0.1496301293373108 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3392167091369629, + "learning_rate": 1.904189602110044e-06, + "loss": 0.2039, + "step": 24301, + "teacher_loss": 0.18881529569625854 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3752850294113159, + "learning_rate": 1.9030822375114375e-06, + "loss": 0.2095, + "step": 24302, + "teacher_loss": 0.19105173647403717 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.15167555212974548, + "learning_rate": 1.9019751731858926e-06, + "loss": 0.1629, + "step": 24303, + "teacher_loss": 0.16415277123451233 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.1904190182685852, + "learning_rate": 1.9008684091587886e-06, + "loss": 0.1683, + "step": 24304, + "teacher_loss": 0.16580118238925934 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.565811812877655, + "learning_rate": 1.8997619454554955e-06, + "loss": 0.2519, + "step": 24305, + "teacher_loss": 0.2170458436012268 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3874053955078125, + "learning_rate": 1.8986557821013844e-06, + "loss": 0.174, + "step": 24306, + "teacher_loss": 0.15026253461837769 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.5449634194374084, + "learning_rate": 1.8975499191218188e-06, + "loss": 0.3114, + "step": 24307, + "teacher_loss": 0.2854386270046234 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.6796234250068665, + "learning_rate": 1.896444356542148e-06, + "loss": 0.1904, + "step": 24308, + "teacher_loss": 0.13602030277252197 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.35378479957580566, + "learning_rate": 1.895339094387722e-06, + "loss": 0.221, + "step": 24309, + "teacher_loss": 0.20626473426818848 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.19117116928100586, + "learning_rate": 1.8942341326838835e-06, + "loss": 0.1707, + "step": 24310, + "teacher_loss": 0.16844549775123596 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.35521265864372253, + "learning_rate": 1.8931294714559593e-06, + "loss": 0.1479, + "step": 24311, + "teacher_loss": 0.12484799325466156 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.5196978449821472, + "learning_rate": 1.8920251107292808e-06, + "loss": 0.2542, + "step": 24312, + "teacher_loss": 0.22468310594558716 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.7515823841094971, + "learning_rate": 1.8909210505291691e-06, + "loss": 0.2975, + "step": 24313, + "teacher_loss": 0.24701833724975586 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.21503028273582458, + "learning_rate": 1.8898172908809324e-06, + "loss": 0.1975, + "step": 24314, + "teacher_loss": 0.1955067366361618 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.30153951048851013, + "learning_rate": 1.8887138318098807e-06, + "loss": 0.1994, + "step": 24315, + "teacher_loss": 0.1880682408809662 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.3739209473133087, + "learning_rate": 1.8876106733413083e-06, + "loss": 0.2047, + "step": 24316, + "teacher_loss": 0.18591301143169403 + }, + { + "compression_loss": 0.0, + "epoch": 4.39, + "label_loss": 0.5987281799316406, + "learning_rate": 1.8865078155005106e-06, + "loss": 0.2801, + "step": 24317, + "teacher_loss": 0.2447076439857483 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.15823659300804138, + "learning_rate": 1.8854052583127769e-06, + "loss": 0.1978, + "step": 24318, + "teacher_loss": 0.20219901204109192 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.39573559165000916, + "learning_rate": 1.884303001803377e-06, + "loss": 0.2103, + "step": 24319, + "teacher_loss": 0.18969517946243286 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3875979781150818, + "learning_rate": 1.8832010459975858e-06, + "loss": 0.1989, + "step": 24320, + "teacher_loss": 0.1778961718082428 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.4808189868927002, + "learning_rate": 1.8820993909206713e-06, + "loss": 0.1874, + "step": 24321, + "teacher_loss": 0.1548454463481903 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.5599536895751953, + "learning_rate": 1.8809980365978851e-06, + "loss": 0.2237, + "step": 24322, + "teacher_loss": 0.1863691806793213 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.41733264923095703, + "learning_rate": 1.8798969830544853e-06, + "loss": 0.2391, + "step": 24323, + "teacher_loss": 0.219307541847229 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.423758327960968, + "learning_rate": 1.878796230315707e-06, + "loss": 0.2184, + "step": 24324, + "teacher_loss": 0.19557365775108337 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.1972811073064804, + "learning_rate": 1.8776957784067928e-06, + "loss": 0.2436, + "step": 24325, + "teacher_loss": 0.24880138039588928 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.24348028004169464, + "learning_rate": 1.8765956273529744e-06, + "loss": 0.2145, + "step": 24326, + "teacher_loss": 0.21124625205993652 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3601433336734772, + "learning_rate": 1.87549577717947e-06, + "loss": 0.1832, + "step": 24327, + "teacher_loss": 0.1635204702615738 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6053407192230225, + "learning_rate": 1.8743962279115012e-06, + "loss": 0.2477, + "step": 24328, + "teacher_loss": 0.2080015391111374 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.4397616684436798, + "learning_rate": 1.8732969795742694e-06, + "loss": 0.2447, + "step": 24329, + "teacher_loss": 0.22301416099071503 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.33742594718933105, + "learning_rate": 1.8721980321929843e-06, + "loss": 0.1857, + "step": 24330, + "teacher_loss": 0.16884958744049072 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.25857770442962646, + "learning_rate": 1.871099385792841e-06, + "loss": 0.2033, + "step": 24331, + "teacher_loss": 0.1971796452999115 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 1.1512397527694702, + "learning_rate": 1.8700010403990225e-06, + "loss": 0.2813, + "step": 24332, + "teacher_loss": 0.18466314673423767 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.9605982899665833, + "learning_rate": 1.8689029960367154e-06, + "loss": 0.2522, + "step": 24333, + "teacher_loss": 0.17345799505710602 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.24304628372192383, + "learning_rate": 1.8678052527310963e-06, + "loss": 0.1672, + "step": 24334, + "teacher_loss": 0.15879777073860168 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.14222028851509094, + "learning_rate": 1.8667078105073283e-06, + "loss": 0.1743, + "step": 24335, + "teacher_loss": 0.17785710096359253 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.5630488991737366, + "learning_rate": 1.8656106693905729e-06, + "loss": 0.2517, + "step": 24336, + "teacher_loss": 0.2171015441417694 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6622228622436523, + "learning_rate": 1.8645138294059899e-06, + "loss": 0.2391, + "step": 24337, + "teacher_loss": 0.19205957651138306 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.24015012383460999, + "learning_rate": 1.8634172905787228e-06, + "loss": 0.1882, + "step": 24338, + "teacher_loss": 0.18239492177963257 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3334270119667053, + "learning_rate": 1.8623210529339047e-06, + "loss": 0.1637, + "step": 24339, + "teacher_loss": 0.1448170393705368 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6739846467971802, + "learning_rate": 1.861225116496682e-06, + "loss": 0.2778, + "step": 24340, + "teacher_loss": 0.23375552892684937 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6571506261825562, + "learning_rate": 1.8601294812921765e-06, + "loss": 0.2483, + "step": 24341, + "teacher_loss": 0.2028503119945526 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.9343421459197998, + "learning_rate": 1.859034147345503e-06, + "loss": 0.4066, + "step": 24342, + "teacher_loss": 0.34793543815612793 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3712049722671509, + "learning_rate": 1.8579391146817781e-06, + "loss": 0.2406, + "step": 24343, + "teacher_loss": 0.22604236006736755 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3520374000072479, + "learning_rate": 1.8568443833261101e-06, + "loss": 0.2294, + "step": 24344, + "teacher_loss": 0.21577146649360657 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.33999398350715637, + "learning_rate": 1.8557499533035922e-06, + "loss": 0.206, + "step": 24345, + "teacher_loss": 0.19111041724681854 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.4820348918437958, + "learning_rate": 1.8546558246393191e-06, + "loss": 0.2729, + "step": 24346, + "teacher_loss": 0.24960818886756897 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.45540502667427063, + "learning_rate": 1.8535619973583795e-06, + "loss": 0.239, + "step": 24347, + "teacher_loss": 0.21490515768527985 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3991914689540863, + "learning_rate": 1.852468471485848e-06, + "loss": 0.2358, + "step": 24348, + "teacher_loss": 0.2175983190536499 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.26334309577941895, + "learning_rate": 1.8513752470467897e-06, + "loss": 0.172, + "step": 24349, + "teacher_loss": 0.16186802089214325 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6084012985229492, + "learning_rate": 1.850282324066283e-06, + "loss": 0.211, + "step": 24350, + "teacher_loss": 0.1668529361486435 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.37073570489883423, + "learning_rate": 1.8491897025693761e-06, + "loss": 0.1716, + "step": 24351, + "teacher_loss": 0.14951342344284058 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.44495195150375366, + "learning_rate": 1.8480973825811154e-06, + "loss": 0.1916, + "step": 24352, + "teacher_loss": 0.16349640488624573 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6999865174293518, + "learning_rate": 1.847005364126556e-06, + "loss": 0.2546, + "step": 24353, + "teacher_loss": 0.2051447182893753 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 1.2140017747879028, + "learning_rate": 1.8459136472307297e-06, + "loss": 0.3739, + "step": 24354, + "teacher_loss": 0.28061002492904663 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.19357341527938843, + "learning_rate": 1.844822231918663e-06, + "loss": 0.1765, + "step": 24355, + "teacher_loss": 0.17463155090808868 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.34363871812820435, + "learning_rate": 1.8437311182153793e-06, + "loss": 0.1966, + "step": 24356, + "teacher_loss": 0.1802481859922409 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.8654544353485107, + "learning_rate": 1.8426403061459019e-06, + "loss": 0.2611, + "step": 24357, + "teacher_loss": 0.1939910352230072 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.36112669110298157, + "learning_rate": 1.841549795735229e-06, + "loss": 0.1653, + "step": 24358, + "teacher_loss": 0.1434864103794098 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.507938027381897, + "learning_rate": 1.8404595870083706e-06, + "loss": 0.2654, + "step": 24359, + "teacher_loss": 0.23849962651729584 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3904286026954651, + "learning_rate": 1.8393696799903204e-06, + "loss": 0.2257, + "step": 24360, + "teacher_loss": 0.20734341442584991 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.9596391916275024, + "learning_rate": 1.8382800747060646e-06, + "loss": 0.3869, + "step": 24361, + "teacher_loss": 0.3232209384441376 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.485186904668808, + "learning_rate": 1.8371907711805851e-06, + "loss": 0.2027, + "step": 24362, + "teacher_loss": 0.17126059532165527 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.40507572889328003, + "learning_rate": 1.8361017694388588e-06, + "loss": 0.24, + "step": 24363, + "teacher_loss": 0.22166912257671356 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.18776948750019073, + "learning_rate": 1.8350130695058537e-06, + "loss": 0.1498, + "step": 24364, + "teacher_loss": 0.14555975794792175 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.3984917104244232, + "learning_rate": 1.8339246714065232e-06, + "loss": 0.1508, + "step": 24365, + "teacher_loss": 0.12329752743244171 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.43079084157943726, + "learning_rate": 1.8328365751658277e-06, + "loss": 0.199, + "step": 24366, + "teacher_loss": 0.17329490184783936 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.4045030474662781, + "learning_rate": 1.8317487808087152e-06, + "loss": 0.2245, + "step": 24367, + "teacher_loss": 0.2044980525970459 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.46897411346435547, + "learning_rate": 1.8306612883601193e-06, + "loss": 0.1912, + "step": 24368, + "teacher_loss": 0.16037797927856445 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6358522176742554, + "learning_rate": 1.8295740978449748e-06, + "loss": 0.2446, + "step": 24369, + "teacher_loss": 0.20110619068145752 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.26492926478385925, + "learning_rate": 1.8284872092882138e-06, + "loss": 0.1618, + "step": 24370, + "teacher_loss": 0.15029895305633545 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.8276483416557312, + "learning_rate": 1.8274006227147478e-06, + "loss": 0.2634, + "step": 24371, + "teacher_loss": 0.20074006915092468 + }, + { + "compression_loss": 0.0, + "epoch": 4.4, + "label_loss": 0.6769616007804871, + "learning_rate": 1.8263143381494917e-06, + "loss": 0.2713, + "step": 24372, + "teacher_loss": 0.22626271843910217 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.3706105351448059, + "learning_rate": 1.8252283556173544e-06, + "loss": 0.1698, + "step": 24373, + "teacher_loss": 0.147472083568573 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.5602754950523376, + "learning_rate": 1.8241426751432273e-06, + "loss": 0.2023, + "step": 24374, + "teacher_loss": 0.162471741437912 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.3811646103858948, + "learning_rate": 1.8230572967520071e-06, + "loss": 0.2002, + "step": 24375, + "teacher_loss": 0.18010881543159485 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.3324650824069977, + "learning_rate": 1.8219722204685775e-06, + "loss": 0.2054, + "step": 24376, + "teacher_loss": 0.19125190377235413 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.265918105840683, + "learning_rate": 1.8208874463178133e-06, + "loss": 0.2949, + "step": 24377, + "teacher_loss": 0.29807722568511963 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.42196860909461975, + "learning_rate": 1.8198029743245897e-06, + "loss": 0.1835, + "step": 24378, + "teacher_loss": 0.15700052678585052 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.7597277760505676, + "learning_rate": 1.8187188045137637e-06, + "loss": 0.1991, + "step": 24379, + "teacher_loss": 0.1368383765220642 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4966760277748108, + "learning_rate": 1.817634936910197e-06, + "loss": 0.2321, + "step": 24380, + "teacher_loss": 0.20269182324409485 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.6978597640991211, + "learning_rate": 1.8165513715387411e-06, + "loss": 0.247, + "step": 24381, + "teacher_loss": 0.1969597041606903 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.529492974281311, + "learning_rate": 1.815468108424233e-06, + "loss": 0.2378, + "step": 24382, + "teacher_loss": 0.20543350279331207 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.6435953378677368, + "learning_rate": 1.814385147591513e-06, + "loss": 0.2446, + "step": 24383, + "teacher_loss": 0.20031176507472992 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4451681673526764, + "learning_rate": 1.813302489065411e-06, + "loss": 0.2366, + "step": 24384, + "teacher_loss": 0.21341761946678162 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.284879207611084, + "learning_rate": 1.812220132870744e-06, + "loss": 0.1767, + "step": 24385, + "teacher_loss": 0.16470956802368164 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.9364553093910217, + "learning_rate": 1.8111380790323334e-06, + "loss": 0.2554, + "step": 24386, + "teacher_loss": 0.17977027595043182 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4164993464946747, + "learning_rate": 1.8100563275749832e-06, + "loss": 0.2547, + "step": 24387, + "teacher_loss": 0.2367614507675171 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4270293712615967, + "learning_rate": 1.8089748785234965e-06, + "loss": 0.1882, + "step": 24388, + "teacher_loss": 0.16168951988220215 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.82603520154953, + "learning_rate": 1.8078937319026655e-06, + "loss": 0.2949, + "step": 24389, + "teacher_loss": 0.23584884405136108 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.32706597447395325, + "learning_rate": 1.8068128877372785e-06, + "loss": 0.1472, + "step": 24390, + "teacher_loss": 0.12715966999530792 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.16757752001285553, + "learning_rate": 1.8057323460521208e-06, + "loss": 0.1327, + "step": 24391, + "teacher_loss": 0.12887245416641235 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4521830081939697, + "learning_rate": 1.8046521068719573e-06, + "loss": 0.2035, + "step": 24392, + "teacher_loss": 0.17590278387069702 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.439098596572876, + "learning_rate": 1.8035721702215601e-06, + "loss": 0.1877, + "step": 24393, + "teacher_loss": 0.1597745716571808 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.165091872215271, + "learning_rate": 1.8024925361256911e-06, + "loss": 0.1615, + "step": 24394, + "teacher_loss": 0.16106855869293213 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.19946181774139404, + "learning_rate": 1.801413204609097e-06, + "loss": 0.1837, + "step": 24395, + "teacher_loss": 0.1819675713777542 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.2500089108943939, + "learning_rate": 1.8003341756965263e-06, + "loss": 0.1627, + "step": 24396, + "teacher_loss": 0.15297794342041016 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.19036871194839478, + "learning_rate": 1.7992554494127229e-06, + "loss": 0.1838, + "step": 24397, + "teacher_loss": 0.18302863836288452 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.43877318501472473, + "learning_rate": 1.7981770257824098e-06, + "loss": 0.2703, + "step": 24398, + "teacher_loss": 0.25158458948135376 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4876255989074707, + "learning_rate": 1.797098904830316e-06, + "loss": 0.286, + "step": 24399, + "teacher_loss": 0.2636134624481201 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.7392245531082153, + "learning_rate": 1.7960210865811634e-06, + "loss": 0.2335, + "step": 24400, + "teacher_loss": 0.17726773023605347 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.6270226836204529, + "learning_rate": 1.79494357105966e-06, + "loss": 0.2603, + "step": 24401, + "teacher_loss": 0.21956589818000793 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.17647115886211395, + "learning_rate": 1.7938663582905035e-06, + "loss": 0.1622, + "step": 24402, + "teacher_loss": 0.16065333783626556 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.1639460325241089, + "learning_rate": 1.7927894482984036e-06, + "loss": 0.1359, + "step": 24403, + "teacher_loss": 0.13276860117912292 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.5401804447174072, + "learning_rate": 1.7917128411080442e-06, + "loss": 0.2255, + "step": 24404, + "teacher_loss": 0.19049134850502014 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.2855954170227051, + "learning_rate": 1.7906365367441068e-06, + "loss": 0.2362, + "step": 24405, + "teacher_loss": 0.23068645596504211 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.484529048204422, + "learning_rate": 1.7895605352312689e-06, + "loss": 0.2447, + "step": 24406, + "teacher_loss": 0.21803578734397888 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.6127817630767822, + "learning_rate": 1.7884848365942035e-06, + "loss": 0.2157, + "step": 24407, + "teacher_loss": 0.17159083485603333 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.32247716188430786, + "learning_rate": 1.787409440857568e-06, + "loss": 0.1888, + "step": 24408, + "teacher_loss": 0.17389345169067383 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.43364787101745605, + "learning_rate": 1.7863343480460208e-06, + "loss": 0.2254, + "step": 24409, + "teacher_loss": 0.2022097259759903 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4437745213508606, + "learning_rate": 1.7852595581842141e-06, + "loss": 0.262, + "step": 24410, + "teacher_loss": 0.24182236194610596 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.19968535006046295, + "learning_rate": 1.7841850712967844e-06, + "loss": 0.1718, + "step": 24411, + "teacher_loss": 0.168709397315979 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.1655711680650711, + "learning_rate": 1.7831108874083623e-06, + "loss": 0.184, + "step": 24412, + "teacher_loss": 0.1860230416059494 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.24983613193035126, + "learning_rate": 1.782037006543588e-06, + "loss": 0.1909, + "step": 24413, + "teacher_loss": 0.18433141708374023 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.5995436906814575, + "learning_rate": 1.7809634287270754e-06, + "loss": 0.207, + "step": 24414, + "teacher_loss": 0.16339261829853058 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.29286009073257446, + "learning_rate": 1.7798901539834362e-06, + "loss": 0.16, + "step": 24415, + "teacher_loss": 0.14525073766708374 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.6715531349182129, + "learning_rate": 1.7788171823372789e-06, + "loss": 0.2091, + "step": 24416, + "teacher_loss": 0.15775421261787415 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 1.046951413154602, + "learning_rate": 1.7777445138132075e-06, + "loss": 0.2772, + "step": 24417, + "teacher_loss": 0.19165468215942383 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.32450950145721436, + "learning_rate": 1.7766721484358089e-06, + "loss": 0.1957, + "step": 24418, + "teacher_loss": 0.18141093850135803 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.7267633080482483, + "learning_rate": 1.7756000862296735e-06, + "loss": 0.3177, + "step": 24419, + "teacher_loss": 0.27227070927619934 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.7510654926300049, + "learning_rate": 1.7745283272193814e-06, + "loss": 0.3022, + "step": 24420, + "teacher_loss": 0.25235676765441895 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.6541892886161804, + "learning_rate": 1.7734568714294997e-06, + "loss": 0.2812, + "step": 24421, + "teacher_loss": 0.23978833854198456 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.2009134441614151, + "learning_rate": 1.7723857188845972e-06, + "loss": 0.16, + "step": 24422, + "teacher_loss": 0.15540926158428192 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.0928993970155716, + "learning_rate": 1.771314869609234e-06, + "loss": 0.1854, + "step": 24423, + "teacher_loss": 0.19564759731292725 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.2945198714733124, + "learning_rate": 1.7702443236279576e-06, + "loss": 0.1712, + "step": 24424, + "teacher_loss": 0.15746445953845978 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.1602174937725067, + "learning_rate": 1.7691740809653128e-06, + "loss": 0.1225, + "step": 24425, + "teacher_loss": 0.11825625598430634 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.603344202041626, + "learning_rate": 1.7681041416458405e-06, + "loss": 0.1997, + "step": 24426, + "teacher_loss": 0.15483233332633972 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.16134929656982422, + "learning_rate": 1.7670345056940708e-06, + "loss": 0.2615, + "step": 24427, + "teacher_loss": 0.2726028561592102 + }, + { + "compression_loss": 0.0, + "epoch": 4.41, + "label_loss": 0.4265596568584442, + "learning_rate": 1.7659651731345206e-06, + "loss": 0.2343, + "step": 24428, + "teacher_loss": 0.21288880705833435 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5848217010498047, + "learning_rate": 1.7648961439917122e-06, + "loss": 0.346, + "step": 24429, + "teacher_loss": 0.31950464844703674 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6196691393852234, + "learning_rate": 1.7638274182901576e-06, + "loss": 0.2176, + "step": 24430, + "teacher_loss": 0.17288947105407715 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3572612404823303, + "learning_rate": 1.7627589960543522e-06, + "loss": 0.2065, + "step": 24431, + "teacher_loss": 0.18978887796401978 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.22268274426460266, + "learning_rate": 1.7616908773087965e-06, + "loss": 0.2262, + "step": 24432, + "teacher_loss": 0.22663632035255432 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.43206608295440674, + "learning_rate": 1.7606230620779822e-06, + "loss": 0.2033, + "step": 24433, + "teacher_loss": 0.17787495255470276 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.4927389621734619, + "learning_rate": 1.7595555503863836e-06, + "loss": 0.3245, + "step": 24434, + "teacher_loss": 0.3058363199234009 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.29906463623046875, + "learning_rate": 1.7584883422584791e-06, + "loss": 0.1785, + "step": 24435, + "teacher_loss": 0.16505998373031616 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.48722803592681885, + "learning_rate": 1.7574214377187393e-06, + "loss": 0.2289, + "step": 24436, + "teacher_loss": 0.20017579197883606 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3993653655052185, + "learning_rate": 1.756354836791621e-06, + "loss": 0.1948, + "step": 24437, + "teacher_loss": 0.17206673324108124 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.28124380111694336, + "learning_rate": 1.7552885395015816e-06, + "loss": 0.1564, + "step": 24438, + "teacher_loss": 0.1425042599439621 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.35560667514801025, + "learning_rate": 1.7542225458730631e-06, + "loss": 0.2529, + "step": 24439, + "teacher_loss": 0.2414434552192688 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.2461598962545395, + "learning_rate": 1.7531568559305094e-06, + "loss": 0.2827, + "step": 24440, + "teacher_loss": 0.28670698404312134 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.10769784450531006, + "learning_rate": 1.7520914696983558e-06, + "loss": 0.0994, + "step": 24441, + "teacher_loss": 0.09850557893514633 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6627588272094727, + "learning_rate": 1.7510263872010229e-06, + "loss": 0.2882, + "step": 24442, + "teacher_loss": 0.2466033697128296 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5780213475227356, + "learning_rate": 1.7499616084629328e-06, + "loss": 0.2319, + "step": 24443, + "teacher_loss": 0.19340112805366516 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5630593299865723, + "learning_rate": 1.748897133508499e-06, + "loss": 0.2938, + "step": 24444, + "teacher_loss": 0.2639217972755432 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.20912209153175354, + "learning_rate": 1.7478329623621226e-06, + "loss": 0.1349, + "step": 24445, + "teacher_loss": 0.12668466567993164 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.12842725217342377, + "learning_rate": 1.7467690950482052e-06, + "loss": 0.1437, + "step": 24446, + "teacher_loss": 0.14537307620048523 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6309764385223389, + "learning_rate": 1.7457055315911391e-06, + "loss": 0.2254, + "step": 24447, + "teacher_loss": 0.18037429451942444 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.41530001163482666, + "learning_rate": 1.7446422720153032e-06, + "loss": 0.2162, + "step": 24448, + "teacher_loss": 0.19403663277626038 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.2699463665485382, + "learning_rate": 1.743579316345083e-06, + "loss": 0.159, + "step": 24449, + "teacher_loss": 0.14670351147651672 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3707202076911926, + "learning_rate": 1.7425166646048407e-06, + "loss": 0.1722, + "step": 24450, + "teacher_loss": 0.15016481280326843 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3348059058189392, + "learning_rate": 1.741454316818945e-06, + "loss": 0.1985, + "step": 24451, + "teacher_loss": 0.18334174156188965 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6357457041740417, + "learning_rate": 1.74039227301175e-06, + "loss": 0.2315, + "step": 24452, + "teacher_loss": 0.1866014152765274 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.8041625022888184, + "learning_rate": 1.7393305332076043e-06, + "loss": 0.2363, + "step": 24453, + "teacher_loss": 0.17319905757904053 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.2968295216560364, + "learning_rate": 1.7382690974308551e-06, + "loss": 0.2248, + "step": 24454, + "teacher_loss": 0.2168281376361847 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.28981465101242065, + "learning_rate": 1.7372079657058316e-06, + "loss": 0.2474, + "step": 24455, + "teacher_loss": 0.24271957576274872 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.45003271102905273, + "learning_rate": 1.7361471380568654e-06, + "loss": 0.2608, + "step": 24456, + "teacher_loss": 0.23978403210639954 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6797051429748535, + "learning_rate": 1.7350866145082827e-06, + "loss": 0.2075, + "step": 24457, + "teacher_loss": 0.1550377607345581 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3361632227897644, + "learning_rate": 1.7340263950843888e-06, + "loss": 0.1666, + "step": 24458, + "teacher_loss": 0.14774832129478455 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.2442660927772522, + "learning_rate": 1.7329664798094973e-06, + "loss": 0.2122, + "step": 24459, + "teacher_loss": 0.20859801769256592 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.23953089118003845, + "learning_rate": 1.731906868707911e-06, + "loss": 0.1732, + "step": 24460, + "teacher_loss": 0.16579227149486542 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.21019737422466278, + "learning_rate": 1.7308475618039199e-06, + "loss": 0.1647, + "step": 24461, + "teacher_loss": 0.15966854989528656 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.4295654594898224, + "learning_rate": 1.7297885591218049e-06, + "loss": 0.4592, + "step": 24462, + "teacher_loss": 0.46250009536743164 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.42035824060440063, + "learning_rate": 1.7287298606858582e-06, + "loss": 0.2162, + "step": 24463, + "teacher_loss": 0.19350774586200714 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.36720970273017883, + "learning_rate": 1.727671466520347e-06, + "loss": 0.1841, + "step": 24464, + "teacher_loss": 0.1637439876794815 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.38488417863845825, + "learning_rate": 1.7266133766495317e-06, + "loss": 0.1771, + "step": 24465, + "teacher_loss": 0.15397042036056519 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5386950373649597, + "learning_rate": 1.7255555910976784e-06, + "loss": 0.2476, + "step": 24466, + "teacher_loss": 0.21529436111450195 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3712984025478363, + "learning_rate": 1.724498109889039e-06, + "loss": 0.3113, + "step": 24467, + "teacher_loss": 0.3046547770500183 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.08986647427082062, + "learning_rate": 1.7234409330478523e-06, + "loss": 0.18, + "step": 24468, + "teacher_loss": 0.19004280865192413 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.49769020080566406, + "learning_rate": 1.7223840605983593e-06, + "loss": 0.2349, + "step": 24469, + "teacher_loss": 0.20565104484558105 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.34266024827957153, + "learning_rate": 1.7213274925647954e-06, + "loss": 0.2121, + "step": 24470, + "teacher_loss": 0.19759118556976318 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6303396224975586, + "learning_rate": 1.7202712289713813e-06, + "loss": 0.2157, + "step": 24471, + "teacher_loss": 0.16958779096603394 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3793298006057739, + "learning_rate": 1.7192152698423258e-06, + "loss": 0.3377, + "step": 24472, + "teacher_loss": 0.3330966532230377 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.4321761727333069, + "learning_rate": 1.718159615201853e-06, + "loss": 0.256, + "step": 24473, + "teacher_loss": 0.23644474148750305 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.6198815107345581, + "learning_rate": 1.7171042650741585e-06, + "loss": 0.1738, + "step": 24474, + "teacher_loss": 0.12429051101207733 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5762876272201538, + "learning_rate": 1.7160492194834331e-06, + "loss": 0.1981, + "step": 24475, + "teacher_loss": 0.1560766100883484 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.22794653475284576, + "learning_rate": 1.714994478453879e-06, + "loss": 0.1751, + "step": 24476, + "teacher_loss": 0.16918307542800903 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3369901478290558, + "learning_rate": 1.7139400420096702e-06, + "loss": 0.2215, + "step": 24477, + "teacher_loss": 0.20867237448692322 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.3909589946269989, + "learning_rate": 1.7128859101749789e-06, + "loss": 0.1945, + "step": 24478, + "teacher_loss": 0.1727023720741272 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.31496286392211914, + "learning_rate": 1.7118320829739775e-06, + "loss": 0.1848, + "step": 24479, + "teacher_loss": 0.17036299407482147 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5217673778533936, + "learning_rate": 1.7107785604308301e-06, + "loss": 0.1998, + "step": 24480, + "teacher_loss": 0.16398631036281586 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.7781469225883484, + "learning_rate": 1.7097253425696823e-06, + "loss": 0.2862, + "step": 24481, + "teacher_loss": 0.2315066158771515 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.4367533028125763, + "learning_rate": 1.7086724294146884e-06, + "loss": 0.235, + "step": 24482, + "teacher_loss": 0.2125321328639984 + }, + { + "compression_loss": 0.0, + "epoch": 4.42, + "label_loss": 0.5458082556724548, + "learning_rate": 1.7076198209899885e-06, + "loss": 0.354, + "step": 24483, + "teacher_loss": 0.3326645493507385 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.26402565836906433, + "learning_rate": 1.7065675173197087e-06, + "loss": 0.1423, + "step": 24484, + "teacher_loss": 0.1287655234336853 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.8025052547454834, + "learning_rate": 1.7055155184279814e-06, + "loss": 0.4451, + "step": 24485, + "teacher_loss": 0.40533286333084106 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.5758934020996094, + "learning_rate": 1.7044638243389271e-06, + "loss": 0.2573, + "step": 24486, + "teacher_loss": 0.22188061475753784 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.21546858549118042, + "learning_rate": 1.7034124350766533e-06, + "loss": 0.1579, + "step": 24487, + "teacher_loss": 0.1515224277973175 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.16158457100391388, + "learning_rate": 1.7023613506652692e-06, + "loss": 0.2592, + "step": 24488, + "teacher_loss": 0.2700707018375397 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.37913060188293457, + "learning_rate": 1.7013105711288668e-06, + "loss": 0.1897, + "step": 24489, + "teacher_loss": 0.16861286759376526 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.44704902172088623, + "learning_rate": 1.7002600964915471e-06, + "loss": 0.2827, + "step": 24490, + "teacher_loss": 0.2643897235393524 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.6619666814804077, + "learning_rate": 1.6992099267773842e-06, + "loss": 0.2676, + "step": 24491, + "teacher_loss": 0.22376984357833862 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.31577426195144653, + "learning_rate": 1.6981600620104586e-06, + "loss": 0.1879, + "step": 24492, + "teacher_loss": 0.1736985743045807 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.7790653705596924, + "learning_rate": 1.6971105022148465e-06, + "loss": 0.2735, + "step": 24493, + "teacher_loss": 0.21737366914749146 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.2521287500858307, + "learning_rate": 1.696061247414603e-06, + "loss": 0.1909, + "step": 24494, + "teacher_loss": 0.1840868443250656 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.6298946142196655, + "learning_rate": 1.6950122976337879e-06, + "loss": 0.2244, + "step": 24495, + "teacher_loss": 0.17929315567016602 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.40682095289230347, + "learning_rate": 1.693963652896453e-06, + "loss": 0.2116, + "step": 24496, + "teacher_loss": 0.1899423599243164 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.4832611382007599, + "learning_rate": 1.6929153132266363e-06, + "loss": 0.1945, + "step": 24497, + "teacher_loss": 0.1624184548854828 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.5534741878509521, + "learning_rate": 1.691867278648373e-06, + "loss": 0.2569, + "step": 24498, + "teacher_loss": 0.2239881455898285 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.1723393201828003, + "learning_rate": 1.6908195491856976e-06, + "loss": 0.1922, + "step": 24499, + "teacher_loss": 0.1944473683834076 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.3675430119037628, + "learning_rate": 1.6897721248626224e-06, + "loss": 0.3125, + "step": 24500, + "teacher_loss": 0.30634430050849915 + }, + { + "epoch": 4.43, + "eval_exact_match": 80.42573320719016, + "eval_f1": 87.68799005551737, + "step": 24500 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.45333170890808105, + "learning_rate": 1.6887250057031701e-06, + "loss": 0.2882, + "step": 24501, + "teacher_loss": 0.26980215311050415 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.728473424911499, + "learning_rate": 1.687678191731341e-06, + "loss": 0.2882, + "step": 24502, + "teacher_loss": 0.2393219918012619 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.3277839422225952, + "learning_rate": 1.6866316829711381e-06, + "loss": 0.1931, + "step": 24503, + "teacher_loss": 0.17818287014961243 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.38891106843948364, + "learning_rate": 1.6855854794465602e-06, + "loss": 0.2188, + "step": 24504, + "teacher_loss": 0.19989748299121857 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.5908646583557129, + "learning_rate": 1.6845395811815834e-06, + "loss": 0.2213, + "step": 24505, + "teacher_loss": 0.18020027875900269 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.6886515617370605, + "learning_rate": 1.6834939882001931e-06, + "loss": 0.2807, + "step": 24506, + "teacher_loss": 0.23535841703414917 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.16791735589504242, + "learning_rate": 1.6824487005263639e-06, + "loss": 0.1744, + "step": 24507, + "teacher_loss": 0.17516668140888214 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.34179258346557617, + "learning_rate": 1.6814037181840546e-06, + "loss": 0.1485, + "step": 24508, + "teacher_loss": 0.12703868746757507 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.41665416955947876, + "learning_rate": 1.6803590411972263e-06, + "loss": 0.1735, + "step": 24509, + "teacher_loss": 0.14642837643623352 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.6931857466697693, + "learning_rate": 1.6793146695898332e-06, + "loss": 0.2836, + "step": 24510, + "teacher_loss": 0.2381274700164795 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.7405045032501221, + "learning_rate": 1.6782706033858191e-06, + "loss": 0.2111, + "step": 24511, + "teacher_loss": 0.15233029425144196 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.6076223850250244, + "learning_rate": 1.6772268426091153e-06, + "loss": 0.2549, + "step": 24512, + "teacher_loss": 0.21572019159793854 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.44470375776290894, + "learning_rate": 1.6761833872836575e-06, + "loss": 0.291, + "step": 24513, + "teacher_loss": 0.2739550769329071 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.36093905568122864, + "learning_rate": 1.6751402374333696e-06, + "loss": 0.195, + "step": 24514, + "teacher_loss": 0.17654427886009216 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.21240416169166565, + "learning_rate": 1.674097393082163e-06, + "loss": 0.1731, + "step": 24515, + "teacher_loss": 0.16877998411655426 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.2884906530380249, + "learning_rate": 1.6730548542539498e-06, + "loss": 0.2116, + "step": 24516, + "teacher_loss": 0.2030259370803833 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.5228714942932129, + "learning_rate": 1.6720126209726362e-06, + "loss": 0.2415, + "step": 24517, + "teacher_loss": 0.2101929634809494 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.658111572265625, + "learning_rate": 1.670970693262111e-06, + "loss": 0.1985, + "step": 24518, + "teacher_loss": 0.14747270941734314 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.44723668694496155, + "learning_rate": 1.6699290711462656e-06, + "loss": 0.2366, + "step": 24519, + "teacher_loss": 0.21324560046195984 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.3051148056983948, + "learning_rate": 1.6688877546489823e-06, + "loss": 0.177, + "step": 24520, + "teacher_loss": 0.16281157732009888 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.3116874694824219, + "learning_rate": 1.667846743794132e-06, + "loss": 0.1577, + "step": 24521, + "teacher_loss": 0.1405673623085022 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.37893199920654297, + "learning_rate": 1.6668060386055855e-06, + "loss": 0.2268, + "step": 24522, + "teacher_loss": 0.2099481225013733 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.2683839499950409, + "learning_rate": 1.6657656391072024e-06, + "loss": 0.1941, + "step": 24523, + "teacher_loss": 0.18583469092845917 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.32590773701667786, + "learning_rate": 1.6647255453228366e-06, + "loss": 0.2221, + "step": 24524, + "teacher_loss": 0.21059784293174744 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.3735974133014679, + "learning_rate": 1.6636857572763258e-06, + "loss": 0.221, + "step": 24525, + "teacher_loss": 0.20400184392929077 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.5440957546234131, + "learning_rate": 1.6626462749915227e-06, + "loss": 0.2715, + "step": 24526, + "teacher_loss": 0.24119020998477936 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.4373500645160675, + "learning_rate": 1.6616070984922515e-06, + "loss": 0.2301, + "step": 24527, + "teacher_loss": 0.2070498764514923 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.2646661400794983, + "learning_rate": 1.6605682278023383e-06, + "loss": 0.1948, + "step": 24528, + "teacher_loss": 0.18702928721904755 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.5522036552429199, + "learning_rate": 1.6595296629456002e-06, + "loss": 0.2706, + "step": 24529, + "teacher_loss": 0.23935039341449738 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.37035250663757324, + "learning_rate": 1.6584914039458537e-06, + "loss": 0.1956, + "step": 24530, + "teacher_loss": 0.17616891860961914 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.2043423056602478, + "learning_rate": 1.6574534508268978e-06, + "loss": 0.2439, + "step": 24531, + "teacher_loss": 0.24826355278491974 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.8247231245040894, + "learning_rate": 1.6564158036125287e-06, + "loss": 0.3939, + "step": 24532, + "teacher_loss": 0.3459968864917755 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.16633634269237518, + "learning_rate": 1.655378462326544e-06, + "loss": 0.2034, + "step": 24533, + "teacher_loss": 0.20750859379768372 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.44069015979766846, + "learning_rate": 1.654341426992721e-06, + "loss": 0.2179, + "step": 24534, + "teacher_loss": 0.19311542809009552 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.4300878047943115, + "learning_rate": 1.653304697634831e-06, + "loss": 0.1783, + "step": 24535, + "teacher_loss": 0.1502757966518402 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.4605398178100586, + "learning_rate": 1.6522682742766549e-06, + "loss": 0.2422, + "step": 24536, + "teacher_loss": 0.21799196302890778 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.24333976209163666, + "learning_rate": 1.6512321569419486e-06, + "loss": 0.1571, + "step": 24537, + "teacher_loss": 0.14752452075481415 + }, + { + "compression_loss": 0.0, + "epoch": 4.43, + "label_loss": 0.3269619941711426, + "learning_rate": 1.650196345654465e-06, + "loss": 0.191, + "step": 24538, + "teacher_loss": 0.17590953409671783 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.5551846027374268, + "learning_rate": 1.6491608404379532e-06, + "loss": 0.2386, + "step": 24539, + "teacher_loss": 0.20343518257141113 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3133026361465454, + "learning_rate": 1.6481256413161594e-06, + "loss": 0.2031, + "step": 24540, + "teacher_loss": 0.19086697697639465 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3053411841392517, + "learning_rate": 1.6470907483128095e-06, + "loss": 0.1836, + "step": 24541, + "teacher_loss": 0.17003867030143738 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.34737879037857056, + "learning_rate": 1.6460561614516362e-06, + "loss": 0.196, + "step": 24542, + "teacher_loss": 0.1791771799325943 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.41136208176612854, + "learning_rate": 1.6450218807563605e-06, + "loss": 0.2294, + "step": 24543, + "teacher_loss": 0.20915460586547852 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.24792975187301636, + "learning_rate": 1.6439879062506885e-06, + "loss": 0.203, + "step": 24544, + "teacher_loss": 0.19802913069725037 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.8182623386383057, + "learning_rate": 1.6429542379583313e-06, + "loss": 0.3206, + "step": 24545, + "teacher_loss": 0.26530399918556213 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3134274482727051, + "learning_rate": 1.6419208759029898e-06, + "loss": 0.1618, + "step": 24546, + "teacher_loss": 0.14490006864070892 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.2610355019569397, + "learning_rate": 1.64088782010835e-06, + "loss": 0.1658, + "step": 24547, + "teacher_loss": 0.15524545311927795 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.48949819803237915, + "learning_rate": 1.6398550705980997e-06, + "loss": 0.2324, + "step": 24548, + "teacher_loss": 0.20381684601306915 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.24826902151107788, + "learning_rate": 1.63882262739592e-06, + "loss": 0.2574, + "step": 24549, + "teacher_loss": 0.2584322392940521 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.36715832352638245, + "learning_rate": 1.6377904905254753e-06, + "loss": 0.1971, + "step": 24550, + "teacher_loss": 0.17826001346111298 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3578345775604248, + "learning_rate": 1.6367586600104366e-06, + "loss": 0.1644, + "step": 24551, + "teacher_loss": 0.14294584095478058 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.2464931160211563, + "learning_rate": 1.6357271358744534e-06, + "loss": 0.1254, + "step": 24552, + "teacher_loss": 0.11192546039819717 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.34004467725753784, + "learning_rate": 1.634695918141182e-06, + "loss": 0.1652, + "step": 24553, + "teacher_loss": 0.14578023552894592 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.5536391735076904, + "learning_rate": 1.6336650068342578e-06, + "loss": 0.2831, + "step": 24554, + "teacher_loss": 0.252998024225235 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.2095588594675064, + "learning_rate": 1.632634401977321e-06, + "loss": 0.1703, + "step": 24555, + "teacher_loss": 0.1659424901008606 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3328903317451477, + "learning_rate": 1.6316041035940038e-06, + "loss": 0.1701, + "step": 24556, + "teacher_loss": 0.15202638506889343 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.48475855588912964, + "learning_rate": 1.6305741117079193e-06, + "loss": 0.1888, + "step": 24557, + "teacher_loss": 0.15589985251426697 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.40051794052124023, + "learning_rate": 1.6295444263426884e-06, + "loss": 0.1961, + "step": 24558, + "teacher_loss": 0.17336460947990417 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3068946897983551, + "learning_rate": 1.628515047521919e-06, + "loss": 0.1711, + "step": 24559, + "teacher_loss": 0.15605273842811584 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.9022079706192017, + "learning_rate": 1.6274859752692073e-06, + "loss": 0.331, + "step": 24560, + "teacher_loss": 0.2675066292285919 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3375336229801178, + "learning_rate": 1.6264572096081526e-06, + "loss": 0.236, + "step": 24561, + "teacher_loss": 0.2247694432735443 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.13949500024318695, + "learning_rate": 1.625428750562336e-06, + "loss": 0.1651, + "step": 24562, + "teacher_loss": 0.1679687201976776 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.24447891116142273, + "learning_rate": 1.6244005981553373e-06, + "loss": 0.1817, + "step": 24563, + "teacher_loss": 0.17474150657653809 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.5347205400466919, + "learning_rate": 1.623372752410734e-06, + "loss": 0.2485, + "step": 24564, + "teacher_loss": 0.2167244851589203 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.158926323056221, + "learning_rate": 1.6223452133520855e-06, + "loss": 0.1905, + "step": 24565, + "teacher_loss": 0.19403883814811707 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.6647493243217468, + "learning_rate": 1.6213179810029533e-06, + "loss": 0.2619, + "step": 24566, + "teacher_loss": 0.21714705228805542 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.17486582696437836, + "learning_rate": 1.6202910553868916e-06, + "loss": 0.1344, + "step": 24567, + "teacher_loss": 0.12994059920310974 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.08730772137641907, + "learning_rate": 1.6192644365274384e-06, + "loss": 0.1532, + "step": 24568, + "teacher_loss": 0.16054219007492065 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3011443614959717, + "learning_rate": 1.6182381244481349e-06, + "loss": 0.1786, + "step": 24569, + "teacher_loss": 0.165004163980484 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.6814702153205872, + "learning_rate": 1.617212119172512e-06, + "loss": 0.2327, + "step": 24570, + "teacher_loss": 0.18287155032157898 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.41953521966934204, + "learning_rate": 1.6161864207240895e-06, + "loss": 0.1774, + "step": 24571, + "teacher_loss": 0.15053123235702515 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.4130859971046448, + "learning_rate": 1.6151610291263885e-06, + "loss": 0.2179, + "step": 24572, + "teacher_loss": 0.1962481141090393 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 1.1413060426712036, + "learning_rate": 1.6141359444029118e-06, + "loss": 0.3217, + "step": 24573, + "teacher_loss": 0.23064029216766357 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.734916090965271, + "learning_rate": 1.6131111665771692e-06, + "loss": 0.3244, + "step": 24574, + "teacher_loss": 0.2787725329399109 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.2855764627456665, + "learning_rate": 1.6120866956726466e-06, + "loss": 0.223, + "step": 24575, + "teacher_loss": 0.2160138189792633 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.24173466861248016, + "learning_rate": 1.6110625317128386e-06, + "loss": 0.2365, + "step": 24576, + "teacher_loss": 0.2359049916267395 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.20645572245121002, + "learning_rate": 1.6100386747212265e-06, + "loss": 0.1775, + "step": 24577, + "teacher_loss": 0.17428681254386902 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.39578214287757874, + "learning_rate": 1.6090151247212814e-06, + "loss": 0.1795, + "step": 24578, + "teacher_loss": 0.15550529956817627 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.2625391483306885, + "learning_rate": 1.6079918817364697e-06, + "loss": 0.1881, + "step": 24579, + "teacher_loss": 0.17986449599266052 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.5581576228141785, + "learning_rate": 1.6069689457902558e-06, + "loss": 0.2521, + "step": 24580, + "teacher_loss": 0.21814373135566711 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.6056469678878784, + "learning_rate": 1.6059463169060862e-06, + "loss": 0.2243, + "step": 24581, + "teacher_loss": 0.181891530752182 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.22236371040344238, + "learning_rate": 1.6049239951074118e-06, + "loss": 0.1915, + "step": 24582, + "teacher_loss": 0.18808165192604065 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.13871464133262634, + "learning_rate": 1.6039019804176709e-06, + "loss": 0.1744, + "step": 24583, + "teacher_loss": 0.17835819721221924 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.22152800858020782, + "learning_rate": 1.6028802728602943e-06, + "loss": 0.189, + "step": 24584, + "teacher_loss": 0.1853894591331482 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.38740813732147217, + "learning_rate": 1.601858872458702e-06, + "loss": 0.1991, + "step": 24585, + "teacher_loss": 0.17819377779960632 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.7993651628494263, + "learning_rate": 1.60083777923632e-06, + "loss": 0.324, + "step": 24586, + "teacher_loss": 0.27118727564811707 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.6790557503700256, + "learning_rate": 1.5998169932165563e-06, + "loss": 0.2862, + "step": 24587, + "teacher_loss": 0.24253737926483154 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.3495616018772125, + "learning_rate": 1.5987965144228089e-06, + "loss": 0.2117, + "step": 24588, + "teacher_loss": 0.19633352756500244 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.26742318272590637, + "learning_rate": 1.5977763428784792e-06, + "loss": 0.192, + "step": 24589, + "teacher_loss": 0.18366271257400513 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.1663876473903656, + "learning_rate": 1.5967564786069599e-06, + "loss": 0.1593, + "step": 24590, + "teacher_loss": 0.15848945081233978 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.15313363075256348, + "learning_rate": 1.5957369216316242e-06, + "loss": 0.1424, + "step": 24591, + "teacher_loss": 0.14121964573860168 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.33252301812171936, + "learning_rate": 1.594717671975855e-06, + "loss": 0.2011, + "step": 24592, + "teacher_loss": 0.18647587299346924 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.17173290252685547, + "learning_rate": 1.5936987296630202e-06, + "loss": 0.1887, + "step": 24593, + "teacher_loss": 0.19054588675498962 + }, + { + "compression_loss": 0.0, + "epoch": 4.44, + "label_loss": 0.0792551189661026, + "learning_rate": 1.592680094716481e-06, + "loss": 0.1636, + "step": 24594, + "teacher_loss": 0.17292210459709167 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.2631770372390747, + "learning_rate": 1.5916617671595823e-06, + "loss": 0.2519, + "step": 24595, + "teacher_loss": 0.25062841176986694 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.31205427646636963, + "learning_rate": 1.5906437470156854e-06, + "loss": 0.197, + "step": 24596, + "teacher_loss": 0.1841677725315094 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.37070631980895996, + "learning_rate": 1.5896260343081247e-06, + "loss": 0.2009, + "step": 24597, + "teacher_loss": 0.18208113312721252 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.7372523546218872, + "learning_rate": 1.5886086290602253e-06, + "loss": 0.3211, + "step": 24598, + "teacher_loss": 0.27482372522354126 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3188885450363159, + "learning_rate": 1.5875915312953281e-06, + "loss": 0.182, + "step": 24599, + "teacher_loss": 0.16679808497428894 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.5853077173233032, + "learning_rate": 1.5865747410367449e-06, + "loss": 0.263, + "step": 24600, + "teacher_loss": 0.22717157006263733 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.276511013507843, + "learning_rate": 1.5855582583077833e-06, + "loss": 0.166, + "step": 24601, + "teacher_loss": 0.153678297996521 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.2685980200767517, + "learning_rate": 1.5845420831317514e-06, + "loss": 0.2098, + "step": 24602, + "teacher_loss": 0.2032998502254486 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.6641155481338501, + "learning_rate": 1.5835262155319524e-06, + "loss": 0.313, + "step": 24603, + "teacher_loss": 0.2739391624927521 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3032028079032898, + "learning_rate": 1.5825106555316693e-06, + "loss": 0.1883, + "step": 24604, + "teacher_loss": 0.17552849650382996 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.26810744404792786, + "learning_rate": 1.5814954031541883e-06, + "loss": 0.1458, + "step": 24605, + "teacher_loss": 0.13221679627895355 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.703420877456665, + "learning_rate": 1.5804804584227912e-06, + "loss": 0.2463, + "step": 24606, + "teacher_loss": 0.19551962614059448 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.08948704600334167, + "learning_rate": 1.5794658213607387e-06, + "loss": 0.1073, + "step": 24607, + "teacher_loss": 0.10924255102872849 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.6370911002159119, + "learning_rate": 1.5784514919912995e-06, + "loss": 0.2512, + "step": 24608, + "teacher_loss": 0.20831122994422913 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.21382860839366913, + "learning_rate": 1.5774374703377314e-06, + "loss": 0.1413, + "step": 24609, + "teacher_loss": 0.13328108191490173 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.24738924205303192, + "learning_rate": 1.5764237564232743e-06, + "loss": 0.247, + "step": 24610, + "teacher_loss": 0.2469521015882492 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.4090577960014343, + "learning_rate": 1.5754103502711791e-06, + "loss": 0.2377, + "step": 24611, + "teacher_loss": 0.21862216293811798 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.420366108417511, + "learning_rate": 1.5743972519046712e-06, + "loss": 0.2265, + "step": 24612, + "teacher_loss": 0.20496167242527008 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.454008549451828, + "learning_rate": 1.5733844613469833e-06, + "loss": 0.3187, + "step": 24613, + "teacher_loss": 0.30361682176589966 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.14232882857322693, + "learning_rate": 1.572371978621337e-06, + "loss": 0.1243, + "step": 24614, + "teacher_loss": 0.12224339693784714 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3221674859523773, + "learning_rate": 1.5713598037509403e-06, + "loss": 0.2208, + "step": 24615, + "teacher_loss": 0.20957759022712708 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.5490390062332153, + "learning_rate": 1.5703479367590045e-06, + "loss": 0.2602, + "step": 24616, + "teacher_loss": 0.22805500030517578 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.5837002396583557, + "learning_rate": 1.5693363776687248e-06, + "loss": 0.2261, + "step": 24617, + "teacher_loss": 0.1863297075033188 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.6253211498260498, + "learning_rate": 1.5683251265032938e-06, + "loss": 0.1997, + "step": 24618, + "teacher_loss": 0.15243801474571228 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.42183393239974976, + "learning_rate": 1.5673141832859e-06, + "loss": 0.2426, + "step": 24619, + "teacher_loss": 0.22263690829277039 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.822284996509552, + "learning_rate": 1.5663035480397164e-06, + "loss": 0.2775, + "step": 24620, + "teacher_loss": 0.2169930785894394 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3234124183654785, + "learning_rate": 1.5652932207879162e-06, + "loss": 0.2255, + "step": 24621, + "teacher_loss": 0.21458397805690765 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.4226740002632141, + "learning_rate": 1.5642832015536656e-06, + "loss": 0.1919, + "step": 24622, + "teacher_loss": 0.16626834869384766 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3952505588531494, + "learning_rate": 1.5632734903601164e-06, + "loss": 0.2201, + "step": 24623, + "teacher_loss": 0.20063111186027527 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.2535870373249054, + "learning_rate": 1.5622640872304234e-06, + "loss": 0.1601, + "step": 24624, + "teacher_loss": 0.14972186088562012 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.4565557837486267, + "learning_rate": 1.5612549921877228e-06, + "loss": 0.2402, + "step": 24625, + "teacher_loss": 0.21612709760665894 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3729138970375061, + "learning_rate": 1.5602462052551548e-06, + "loss": 0.1652, + "step": 24626, + "teacher_loss": 0.14213019609451294 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.31533122062683105, + "learning_rate": 1.5592377264558489e-06, + "loss": 0.1678, + "step": 24627, + "teacher_loss": 0.15138697624206543 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.26353466510772705, + "learning_rate": 1.5582295558129218e-06, + "loss": 0.2184, + "step": 24628, + "teacher_loss": 0.21338677406311035 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.5461602210998535, + "learning_rate": 1.5572216933494914e-06, + "loss": 0.1904, + "step": 24629, + "teacher_loss": 0.15084102749824524 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.6247513890266418, + "learning_rate": 1.5562141390886663e-06, + "loss": 0.2491, + "step": 24630, + "teacher_loss": 0.20732802152633667 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.4554939866065979, + "learning_rate": 1.5552068930535412e-06, + "loss": 0.246, + "step": 24631, + "teacher_loss": 0.22275623679161072 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3492724597454071, + "learning_rate": 1.5541999552672127e-06, + "loss": 0.19, + "step": 24632, + "teacher_loss": 0.1723240166902542 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.5178931951522827, + "learning_rate": 1.5531933257527686e-06, + "loss": 0.1915, + "step": 24633, + "teacher_loss": 0.15527847409248352 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3822687268257141, + "learning_rate": 1.5521870045332875e-06, + "loss": 0.2464, + "step": 24634, + "teacher_loss": 0.2313191294670105 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.16244147717952728, + "learning_rate": 1.551180991631836e-06, + "loss": 0.2605, + "step": 24635, + "teacher_loss": 0.271393358707428 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.19074472784996033, + "learning_rate": 1.5501752870714837e-06, + "loss": 0.147, + "step": 24636, + "teacher_loss": 0.14218004047870636 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.2485916018486023, + "learning_rate": 1.5491698908752889e-06, + "loss": 0.2338, + "step": 24637, + "teacher_loss": 0.23218220472335815 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.6456116437911987, + "learning_rate": 1.548164803066298e-06, + "loss": 0.2829, + "step": 24638, + "teacher_loss": 0.24261735379695892 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.2742326855659485, + "learning_rate": 1.5471600236675582e-06, + "loss": 0.1709, + "step": 24639, + "teacher_loss": 0.15940365195274353 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.6404263973236084, + "learning_rate": 1.5461555527021088e-06, + "loss": 0.222, + "step": 24640, + "teacher_loss": 0.17556026577949524 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.48365387320518494, + "learning_rate": 1.5451513901929714e-06, + "loss": 0.2302, + "step": 24641, + "teacher_loss": 0.20198820531368256 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.735905647277832, + "learning_rate": 1.5441475361631746e-06, + "loss": 0.2455, + "step": 24642, + "teacher_loss": 0.19096827507019043 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3681783080101013, + "learning_rate": 1.5431439906357332e-06, + "loss": 0.2491, + "step": 24643, + "teacher_loss": 0.2358437478542328 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.05697283148765564, + "learning_rate": 1.5421407536336534e-06, + "loss": 0.1422, + "step": 24644, + "teacher_loss": 0.15165507793426514 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.40386247634887695, + "learning_rate": 1.5411378251799357e-06, + "loss": 0.2246, + "step": 24645, + "teacher_loss": 0.2046658843755722 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.3578646183013916, + "learning_rate": 1.5401352052975798e-06, + "loss": 0.1987, + "step": 24646, + "teacher_loss": 0.18096241354942322 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.08000800013542175, + "learning_rate": 1.5391328940095674e-06, + "loss": 0.1294, + "step": 24647, + "teacher_loss": 0.13485166430473328 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.683329701423645, + "learning_rate": 1.5381308913388764e-06, + "loss": 0.2484, + "step": 24648, + "teacher_loss": 0.20010778307914734 + }, + { + "compression_loss": 0.0, + "epoch": 4.45, + "label_loss": 0.7667165994644165, + "learning_rate": 1.5371291973084873e-06, + "loss": 0.2812, + "step": 24649, + "teacher_loss": 0.22724270820617676 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5416953563690186, + "learning_rate": 1.536127811941363e-06, + "loss": 0.284, + "step": 24650, + "teacher_loss": 0.25540316104888916 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.19146335124969482, + "learning_rate": 1.535126735260457e-06, + "loss": 0.2142, + "step": 24651, + "teacher_loss": 0.21671931445598602 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.12340216338634491, + "learning_rate": 1.534125967288726e-06, + "loss": 0.1254, + "step": 24652, + "teacher_loss": 0.12559425830841064 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.4364277720451355, + "learning_rate": 1.5331255080491162e-06, + "loss": 0.21, + "step": 24653, + "teacher_loss": 0.18480491638183594 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.4281620979309082, + "learning_rate": 1.5321253575645615e-06, + "loss": 0.164, + "step": 24654, + "teacher_loss": 0.13470236957073212 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5908030271530151, + "learning_rate": 1.5311255158579918e-06, + "loss": 0.4113, + "step": 24655, + "teacher_loss": 0.3914051353931427 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3711923360824585, + "learning_rate": 1.530125982952335e-06, + "loss": 0.204, + "step": 24656, + "teacher_loss": 0.18546688556671143 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.6110674142837524, + "learning_rate": 1.5291267588705065e-06, + "loss": 0.3238, + "step": 24657, + "teacher_loss": 0.29193222522735596 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3262002468109131, + "learning_rate": 1.5281278436354063e-06, + "loss": 0.2399, + "step": 24658, + "teacher_loss": 0.2302880436182022 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.2759372293949127, + "learning_rate": 1.5271292372699507e-06, + "loss": 0.172, + "step": 24659, + "teacher_loss": 0.1604917347431183 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3211386501789093, + "learning_rate": 1.5261309397970269e-06, + "loss": 0.2032, + "step": 24660, + "teacher_loss": 0.1901111602783203 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.4968227744102478, + "learning_rate": 1.5251329512395212e-06, + "loss": 0.2178, + "step": 24661, + "teacher_loss": 0.18679603934288025 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3995521366596222, + "learning_rate": 1.5241352716203173e-06, + "loss": 0.1969, + "step": 24662, + "teacher_loss": 0.1743713617324829 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.1897321343421936, + "learning_rate": 1.5231379009622914e-06, + "loss": 0.1452, + "step": 24663, + "teacher_loss": 0.1402081549167633 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.585931658744812, + "learning_rate": 1.5221408392883057e-06, + "loss": 0.2181, + "step": 24664, + "teacher_loss": 0.17719818651676178 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.7102159857749939, + "learning_rate": 1.5211440866212218e-06, + "loss": 0.2759, + "step": 24665, + "teacher_loss": 0.22761595249176025 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.8968967795372009, + "learning_rate": 1.5201476429838945e-06, + "loss": 0.268, + "step": 24666, + "teacher_loss": 0.1980932503938675 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.8282178640365601, + "learning_rate": 1.5191515083991641e-06, + "loss": 0.4576, + "step": 24667, + "teacher_loss": 0.4164133071899414 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5654422044754028, + "learning_rate": 1.5181556828898724e-06, + "loss": 0.2073, + "step": 24668, + "teacher_loss": 0.16754689812660217 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.6311633586883545, + "learning_rate": 1.5171601664788525e-06, + "loss": 0.2931, + "step": 24669, + "teacher_loss": 0.255515456199646 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.8347383737564087, + "learning_rate": 1.5161649591889248e-06, + "loss": 0.3213, + "step": 24670, + "teacher_loss": 0.26424330472946167 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.30423033237457275, + "learning_rate": 1.5151700610429076e-06, + "loss": 0.384, + "step": 24671, + "teacher_loss": 0.39285749197006226 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.25690388679504395, + "learning_rate": 1.5141754720636126e-06, + "loss": 0.2137, + "step": 24672, + "teacher_loss": 0.2089177817106247 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.9678155779838562, + "learning_rate": 1.5131811922738398e-06, + "loss": 0.2912, + "step": 24673, + "teacher_loss": 0.2160579413175583 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.9530693292617798, + "learning_rate": 1.5121872216963894e-06, + "loss": 0.3314, + "step": 24674, + "teacher_loss": 0.2623268961906433 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.4829097390174866, + "learning_rate": 1.511193560354045e-06, + "loss": 0.2306, + "step": 24675, + "teacher_loss": 0.2025410234928131 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3551364541053772, + "learning_rate": 1.510200208269593e-06, + "loss": 0.2342, + "step": 24676, + "teacher_loss": 0.22076798975467682 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.41642531752586365, + "learning_rate": 1.5092071654658024e-06, + "loss": 0.1354, + "step": 24677, + "teacher_loss": 0.1041313111782074 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.7869532108306885, + "learning_rate": 1.5082144319654445e-06, + "loss": 0.2753, + "step": 24678, + "teacher_loss": 0.2184121012687683 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5624879598617554, + "learning_rate": 1.507222007791283e-06, + "loss": 0.2677, + "step": 24679, + "teacher_loss": 0.2349700629711151 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.2554394602775574, + "learning_rate": 1.5062298929660628e-06, + "loss": 0.1441, + "step": 24680, + "teacher_loss": 0.13173530995845795 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3748375475406647, + "learning_rate": 1.505238087512536e-06, + "loss": 0.1815, + "step": 24681, + "teacher_loss": 0.16005949676036835 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.4648219048976898, + "learning_rate": 1.5042465914534426e-06, + "loss": 0.1887, + "step": 24682, + "teacher_loss": 0.158060222864151 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.7790089249610901, + "learning_rate": 1.503255404811511e-06, + "loss": 0.2987, + "step": 24683, + "teacher_loss": 0.24531182646751404 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.2799026072025299, + "learning_rate": 1.5022645276094681e-06, + "loss": 0.1935, + "step": 24684, + "teacher_loss": 0.18392933905124664 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3267536759376526, + "learning_rate": 1.501273959870031e-06, + "loss": 0.2577, + "step": 24685, + "teacher_loss": 0.2500481605529785 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.2251317799091339, + "learning_rate": 1.5002837016159093e-06, + "loss": 0.164, + "step": 24686, + "teacher_loss": 0.15719449520111084 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.6285199522972107, + "learning_rate": 1.4992937528698103e-06, + "loss": 0.2394, + "step": 24687, + "teacher_loss": 0.19614554941654205 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.8788745999336243, + "learning_rate": 1.4983041136544273e-06, + "loss": 0.2853, + "step": 24688, + "teacher_loss": 0.2193090170621872 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.23762989044189453, + "learning_rate": 1.497314783992449e-06, + "loss": 0.1476, + "step": 24689, + "teacher_loss": 0.13756787776947021 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5801180005073547, + "learning_rate": 1.4963257639065636e-06, + "loss": 0.2635, + "step": 24690, + "teacher_loss": 0.22836823761463165 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.676182210445404, + "learning_rate": 1.4953370534194382e-06, + "loss": 0.2348, + "step": 24691, + "teacher_loss": 0.18579134345054626 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.42612701654434204, + "learning_rate": 1.4943486525537464e-06, + "loss": 0.1783, + "step": 24692, + "teacher_loss": 0.15074390172958374 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.8064684867858887, + "learning_rate": 1.49336056133215e-06, + "loss": 0.2335, + "step": 24693, + "teacher_loss": 0.1698903888463974 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.4468753933906555, + "learning_rate": 1.492372779777299e-06, + "loss": 0.1866, + "step": 24694, + "teacher_loss": 0.15769805014133453 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5548670291900635, + "learning_rate": 1.4913853079118422e-06, + "loss": 0.2115, + "step": 24695, + "teacher_loss": 0.17333796620368958 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3382173180580139, + "learning_rate": 1.4903981457584215e-06, + "loss": 0.2125, + "step": 24696, + "teacher_loss": 0.1985061764717102 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.2376953363418579, + "learning_rate": 1.489411293339667e-06, + "loss": 0.2183, + "step": 24697, + "teacher_loss": 0.21618568897247314 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.38035848736763, + "learning_rate": 1.4884247506782023e-06, + "loss": 0.2258, + "step": 24698, + "teacher_loss": 0.2085937261581421 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.9342123866081238, + "learning_rate": 1.4874385177966493e-06, + "loss": 0.2226, + "step": 24699, + "teacher_loss": 0.14357687532901764 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.6674273014068604, + "learning_rate": 1.48645259471762e-06, + "loss": 0.2196, + "step": 24700, + "teacher_loss": 0.16979068517684937 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3347325325012207, + "learning_rate": 1.4854669814637145e-06, + "loss": 0.2579, + "step": 24701, + "teacher_loss": 0.24932903051376343 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5112294554710388, + "learning_rate": 1.4844816780575315e-06, + "loss": 0.1881, + "step": 24702, + "teacher_loss": 0.15215738117694855 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.3892780542373657, + "learning_rate": 1.4834966845216663e-06, + "loss": 0.2246, + "step": 24703, + "teacher_loss": 0.20635256171226501 + }, + { + "compression_loss": 0.0, + "epoch": 4.46, + "label_loss": 0.5779500007629395, + "learning_rate": 1.482512000878694e-06, + "loss": 0.2208, + "step": 24704, + "teacher_loss": 0.18107977509498596 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.507804274559021, + "learning_rate": 1.4815276271511936e-06, + "loss": 0.1904, + "step": 24705, + "teacher_loss": 0.15515880286693573 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.3817175328731537, + "learning_rate": 1.4805435633617381e-06, + "loss": 0.1584, + "step": 24706, + "teacher_loss": 0.13354583084583282 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.29282355308532715, + "learning_rate": 1.479559809532885e-06, + "loss": 0.1659, + "step": 24707, + "teacher_loss": 0.15183493494987488 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4344145655632019, + "learning_rate": 1.4785763656871826e-06, + "loss": 0.2414, + "step": 24708, + "teacher_loss": 0.21998082101345062 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.8593874573707581, + "learning_rate": 1.4775932318471913e-06, + "loss": 0.2505, + "step": 24709, + "teacher_loss": 0.1828332394361496 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.1697319597005844, + "learning_rate": 1.4766104080354448e-06, + "loss": 0.18, + "step": 24710, + "teacher_loss": 0.1811947226524353 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.19014552235603333, + "learning_rate": 1.4756278942744733e-06, + "loss": 0.1453, + "step": 24711, + "teacher_loss": 0.1402636468410492 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.42603686451911926, + "learning_rate": 1.4746456905868055e-06, + "loss": 0.27, + "step": 24712, + "teacher_loss": 0.2526553273200989 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.3187551498413086, + "learning_rate": 1.4736637969949634e-06, + "loss": 0.1662, + "step": 24713, + "teacher_loss": 0.14927524328231812 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.45264625549316406, + "learning_rate": 1.4726822135214558e-06, + "loss": 0.2053, + "step": 24714, + "teacher_loss": 0.17778238654136658 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4446519613265991, + "learning_rate": 1.471700940188786e-06, + "loss": 0.2067, + "step": 24715, + "teacher_loss": 0.18024948239326477 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.3290180563926697, + "learning_rate": 1.470719977019458e-06, + "loss": 0.1718, + "step": 24716, + "teacher_loss": 0.15432271361351013 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.434977650642395, + "learning_rate": 1.4697393240359536e-06, + "loss": 0.2098, + "step": 24717, + "teacher_loss": 0.18477891385555267 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.3501642048358917, + "learning_rate": 1.4687589812607616e-06, + "loss": 0.1579, + "step": 24718, + "teacher_loss": 0.13657647371292114 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.5264714956283569, + "learning_rate": 1.4677789487163606e-06, + "loss": 0.1831, + "step": 24719, + "teacher_loss": 0.1449185311794281 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.24844853579998016, + "learning_rate": 1.466799226425216e-06, + "loss": 0.1484, + "step": 24720, + "teacher_loss": 0.13733674585819244 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.9030625224113464, + "learning_rate": 1.4658198144097851e-06, + "loss": 0.2783, + "step": 24721, + "teacher_loss": 0.20885899662971497 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.648826003074646, + "learning_rate": 1.4648407126925329e-06, + "loss": 0.2168, + "step": 24722, + "teacher_loss": 0.16884389519691467 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 1.0891716480255127, + "learning_rate": 1.4638619212959047e-06, + "loss": 0.311, + "step": 24723, + "teacher_loss": 0.2245115041732788 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.39611464738845825, + "learning_rate": 1.4628834402423347e-06, + "loss": 0.19, + "step": 24724, + "teacher_loss": 0.1671123057603836 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.25927823781967163, + "learning_rate": 1.4619052695542612e-06, + "loss": 0.1868, + "step": 24725, + "teacher_loss": 0.17873573303222656 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.2830972671508789, + "learning_rate": 1.4609274092541148e-06, + "loss": 0.1664, + "step": 24726, + "teacher_loss": 0.15346573293209076 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.17762327194213867, + "learning_rate": 1.4599498593643056e-06, + "loss": 0.1601, + "step": 24727, + "teacher_loss": 0.15810611844062805 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.1138099730014801, + "learning_rate": 1.4589726199072528e-06, + "loss": 0.1993, + "step": 24728, + "teacher_loss": 0.20885249972343445 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.17823049426078796, + "learning_rate": 1.4579956909053616e-06, + "loss": 0.1974, + "step": 24729, + "teacher_loss": 0.1995641142129898 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.29373645782470703, + "learning_rate": 1.4570190723810256e-06, + "loss": 0.1385, + "step": 24730, + "teacher_loss": 0.12124369293451309 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.2298370599746704, + "learning_rate": 1.4560427643566388e-06, + "loss": 0.2069, + "step": 24731, + "teacher_loss": 0.20432695746421814 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.26759904623031616, + "learning_rate": 1.4550667668545864e-06, + "loss": 0.1385, + "step": 24732, + "teacher_loss": 0.12414171546697617 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.28581398725509644, + "learning_rate": 1.4540910798972407e-06, + "loss": 0.2237, + "step": 24733, + "teacher_loss": 0.21684041619300842 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.5487774610519409, + "learning_rate": 1.453115703506977e-06, + "loss": 0.2201, + "step": 24734, + "teacher_loss": 0.18352553248405457 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.9727622270584106, + "learning_rate": 1.4521406377061525e-06, + "loss": 0.2929, + "step": 24735, + "teacher_loss": 0.21734902262687683 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4192865490913391, + "learning_rate": 1.4511658825171226e-06, + "loss": 0.2818, + "step": 24736, + "teacher_loss": 0.2665690779685974 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.5896538496017456, + "learning_rate": 1.4501914379622411e-06, + "loss": 0.2047, + "step": 24737, + "teacher_loss": 0.16190290451049805 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.25755244493484497, + "learning_rate": 1.4492173040638435e-06, + "loss": 0.2007, + "step": 24738, + "teacher_loss": 0.1944286972284317 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4545484781265259, + "learning_rate": 1.4482434808442686e-06, + "loss": 0.2612, + "step": 24739, + "teacher_loss": 0.2397494912147522 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.42317995429039, + "learning_rate": 1.4472699683258366e-06, + "loss": 0.2486, + "step": 24740, + "teacher_loss": 0.22916650772094727 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 1.0320826768875122, + "learning_rate": 1.446296766530872e-06, + "loss": 0.4354, + "step": 24741, + "teacher_loss": 0.36911171674728394 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.26616430282592773, + "learning_rate": 1.4453238754816878e-06, + "loss": 0.176, + "step": 24742, + "teacher_loss": 0.1660006046295166 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4261089563369751, + "learning_rate": 1.4443512952005866e-06, + "loss": 0.2478, + "step": 24743, + "teacher_loss": 0.22793373465538025 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.2057255357503891, + "learning_rate": 1.4433790257098672e-06, + "loss": 0.1886, + "step": 24744, + "teacher_loss": 0.18664193153381348 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.7159783244132996, + "learning_rate": 1.4424070670318252e-06, + "loss": 0.2734, + "step": 24745, + "teacher_loss": 0.22421352565288544 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.6081286668777466, + "learning_rate": 1.4414354191887392e-06, + "loss": 0.2706, + "step": 24746, + "teacher_loss": 0.2331523895263672 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.40091952681541443, + "learning_rate": 1.4404640822028914e-06, + "loss": 0.1951, + "step": 24747, + "teacher_loss": 0.17221969366073608 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.13203710317611694, + "learning_rate": 1.4394930560965441e-06, + "loss": 0.1971, + "step": 24748, + "teacher_loss": 0.20431926846504211 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.22094330191612244, + "learning_rate": 1.4385223408919662e-06, + "loss": 0.1982, + "step": 24749, + "teacher_loss": 0.19565460085868835 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.37275946140289307, + "learning_rate": 1.437551936611413e-06, + "loss": 0.1911, + "step": 24750, + "teacher_loss": 0.17092156410217285 + }, + { + "epoch": 4.47, + "eval_exact_match": 80.47303689687796, + "eval_f1": 87.72585406675309, + "step": 24750 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.658419132232666, + "learning_rate": 1.4365818432771289e-06, + "loss": 0.2418, + "step": 24751, + "teacher_loss": 0.1954875886440277 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.8100974559783936, + "learning_rate": 1.4356120609113587e-06, + "loss": 0.2808, + "step": 24752, + "teacher_loss": 0.22197428345680237 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4429551064968109, + "learning_rate": 1.4346425895363385e-06, + "loss": 0.2405, + "step": 24753, + "teacher_loss": 0.21795004606246948 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.349784791469574, + "learning_rate": 1.4336734291742904e-06, + "loss": 0.2065, + "step": 24754, + "teacher_loss": 0.19053535163402557 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.315716028213501, + "learning_rate": 1.4327045798474347e-06, + "loss": 0.1715, + "step": 24755, + "teacher_loss": 0.15547645092010498 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 1.0179736614227295, + "learning_rate": 1.4317360415779907e-06, + "loss": 0.2393, + "step": 24756, + "teacher_loss": 0.15274512767791748 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4865512251853943, + "learning_rate": 1.4307678143881586e-06, + "loss": 0.1607, + "step": 24757, + "teacher_loss": 0.12445828318595886 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.2341880053281784, + "learning_rate": 1.4297998983001327e-06, + "loss": 0.1765, + "step": 24758, + "teacher_loss": 0.17013861238956451 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4739038348197937, + "learning_rate": 1.4288322933361147e-06, + "loss": 0.2063, + "step": 24759, + "teacher_loss": 0.17651841044425964 + }, + { + "compression_loss": 0.0, + "epoch": 4.47, + "label_loss": 0.4122597277164459, + "learning_rate": 1.4278649995182858e-06, + "loss": 0.2249, + "step": 24760, + "teacher_loss": 0.20410922169685364 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.8624149560928345, + "learning_rate": 1.4268980168688161e-06, + "loss": 0.2762, + "step": 24761, + "teacher_loss": 0.21102187037467957 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4933362603187561, + "learning_rate": 1.42593134540988e-06, + "loss": 0.2163, + "step": 24762, + "teacher_loss": 0.18548986315727234 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.12792108952999115, + "learning_rate": 1.424964985163646e-06, + "loss": 0.1672, + "step": 24763, + "teacher_loss": 0.17160899937152863 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.38337233662605286, + "learning_rate": 1.4239989361522599e-06, + "loss": 0.2332, + "step": 24764, + "teacher_loss": 0.2164752334356308 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.17478561401367188, + "learning_rate": 1.4230331983978773e-06, + "loss": 0.1824, + "step": 24765, + "teacher_loss": 0.1832735240459442 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4481823444366455, + "learning_rate": 1.4220677719226389e-06, + "loss": 0.2335, + "step": 24766, + "teacher_loss": 0.20965327322483063 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4820939898490906, + "learning_rate": 1.4211026567486752e-06, + "loss": 0.1987, + "step": 24767, + "teacher_loss": 0.16721491515636444 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.2123582363128662, + "learning_rate": 1.420137852898117e-06, + "loss": 0.1179, + "step": 24768, + "teacher_loss": 0.10745567828416824 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4929640293121338, + "learning_rate": 1.4191733603930845e-06, + "loss": 0.1772, + "step": 24769, + "teacher_loss": 0.1421264111995697 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.23019075393676758, + "learning_rate": 1.4182091792556906e-06, + "loss": 0.1725, + "step": 24770, + "teacher_loss": 0.1660854071378708 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.36552345752716064, + "learning_rate": 1.417245309508034e-06, + "loss": 0.1818, + "step": 24771, + "teacher_loss": 0.16137701272964478 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.49269217252731323, + "learning_rate": 1.4162817511722237e-06, + "loss": 0.1861, + "step": 24772, + "teacher_loss": 0.15204837918281555 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5924199819564819, + "learning_rate": 1.4153185042703488e-06, + "loss": 0.1951, + "step": 24773, + "teacher_loss": 0.1509571373462677 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.49942952394485474, + "learning_rate": 1.4143555688244863e-06, + "loss": 0.2845, + "step": 24774, + "teacher_loss": 0.2606343626976013 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.3113252520561218, + "learning_rate": 1.4133929448567206e-06, + "loss": 0.1971, + "step": 24775, + "teacher_loss": 0.18445220589637756 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.3413528800010681, + "learning_rate": 1.4124306323891222e-06, + "loss": 0.1711, + "step": 24776, + "teacher_loss": 0.15218310058116913 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4467523694038391, + "learning_rate": 1.4114686314437487e-06, + "loss": 0.2471, + "step": 24777, + "teacher_loss": 0.22488830983638763 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4966054856777191, + "learning_rate": 1.4105069420426603e-06, + "loss": 0.235, + "step": 24778, + "teacher_loss": 0.20590201020240784 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.2515527606010437, + "learning_rate": 1.4095455642079048e-06, + "loss": 0.1677, + "step": 24779, + "teacher_loss": 0.15837281942367554 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.6548233032226562, + "learning_rate": 1.408584497961526e-06, + "loss": 0.2813, + "step": 24780, + "teacher_loss": 0.2397511899471283 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.31414830684661865, + "learning_rate": 1.407623743325548e-06, + "loss": 0.1766, + "step": 24781, + "teacher_loss": 0.16128921508789062 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.1872939169406891, + "learning_rate": 1.4066633003220113e-06, + "loss": 0.175, + "step": 24782, + "teacher_loss": 0.17358499765396118 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5155812501907349, + "learning_rate": 1.4057031689729304e-06, + "loss": 0.2798, + "step": 24783, + "teacher_loss": 0.2536531686782837 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5210274457931519, + "learning_rate": 1.4047433493003141e-06, + "loss": 0.2562, + "step": 24784, + "teacher_loss": 0.22682130336761475 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.49933522939682007, + "learning_rate": 1.4037838413261733e-06, + "loss": 0.2407, + "step": 24785, + "teacher_loss": 0.21200178563594818 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.46735522150993347, + "learning_rate": 1.4028246450725084e-06, + "loss": 0.1796, + "step": 24786, + "teacher_loss": 0.14761066436767578 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.14099842309951782, + "learning_rate": 1.4018657605613056e-06, + "loss": 0.1028, + "step": 24787, + "teacher_loss": 0.09852568805217743 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.6091364622116089, + "learning_rate": 1.4009071878145502e-06, + "loss": 0.2166, + "step": 24788, + "teacher_loss": 0.17299991846084595 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5375706553459167, + "learning_rate": 1.3999489268542248e-06, + "loss": 0.2859, + "step": 24789, + "teacher_loss": 0.2578897774219513 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.3684234619140625, + "learning_rate": 1.3989909777022918e-06, + "loss": 0.2074, + "step": 24790, + "teacher_loss": 0.1895555853843689 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5994600057601929, + "learning_rate": 1.3980333403807189e-06, + "loss": 0.3783, + "step": 24791, + "teacher_loss": 0.3536805510520935 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.577006459236145, + "learning_rate": 1.3970760149114614e-06, + "loss": 0.2635, + "step": 24792, + "teacher_loss": 0.22869713604450226 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.42220306396484375, + "learning_rate": 1.3961190013164653e-06, + "loss": 0.1982, + "step": 24793, + "teacher_loss": 0.1732625663280487 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.2720046043395996, + "learning_rate": 1.395162299617675e-06, + "loss": 0.2046, + "step": 24794, + "teacher_loss": 0.19710539281368256 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.18090149760246277, + "learning_rate": 1.3942059098370258e-06, + "loss": 0.1509, + "step": 24795, + "teacher_loss": 0.1475912183523178 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.7780902981758118, + "learning_rate": 1.3932498319964403e-06, + "loss": 0.2417, + "step": 24796, + "teacher_loss": 0.18214696645736694 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.39168781042099, + "learning_rate": 1.3922940661178429e-06, + "loss": 0.2042, + "step": 24797, + "teacher_loss": 0.18339765071868896 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.21832218766212463, + "learning_rate": 1.3913386122231426e-06, + "loss": 0.2034, + "step": 24798, + "teacher_loss": 0.2017078399658203 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4339350461959839, + "learning_rate": 1.3903834703342466e-06, + "loss": 0.1658, + "step": 24799, + "teacher_loss": 0.13602042198181152 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5206286311149597, + "learning_rate": 1.3894286404730576e-06, + "loss": 0.2713, + "step": 24800, + "teacher_loss": 0.24358314275741577 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4038732647895813, + "learning_rate": 1.38847412266146e-06, + "loss": 0.2993, + "step": 24801, + "teacher_loss": 0.28771454095840454 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.6999591588973999, + "learning_rate": 1.3875199169213443e-06, + "loss": 0.2736, + "step": 24802, + "teacher_loss": 0.22623848915100098 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.49118661880493164, + "learning_rate": 1.3865660232745813e-06, + "loss": 0.1868, + "step": 24803, + "teacher_loss": 0.1530255675315857 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.45903363823890686, + "learning_rate": 1.3856124417430438e-06, + "loss": 0.2109, + "step": 24804, + "teacher_loss": 0.18338388204574585 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.22812677919864655, + "learning_rate": 1.3846591723485974e-06, + "loss": 0.1256, + "step": 24805, + "teacher_loss": 0.11420183628797531 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5153632164001465, + "learning_rate": 1.383706215113093e-06, + "loss": 0.2431, + "step": 24806, + "teacher_loss": 0.21281574666500092 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.5113099813461304, + "learning_rate": 1.382753570058385e-06, + "loss": 0.2509, + "step": 24807, + "teacher_loss": 0.22199270129203796 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.4905911684036255, + "learning_rate": 1.3818012372063071e-06, + "loss": 0.2455, + "step": 24808, + "teacher_loss": 0.21822744607925415 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.2916296720504761, + "learning_rate": 1.380849216578699e-06, + "loss": 0.2346, + "step": 24809, + "teacher_loss": 0.2282804250717163 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.18334712088108063, + "learning_rate": 1.3798975081973864e-06, + "loss": 0.2082, + "step": 24810, + "teacher_loss": 0.21090614795684814 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.3228442966938019, + "learning_rate": 1.3789461120841883e-06, + "loss": 0.1673, + "step": 24811, + "teacher_loss": 0.15005150437355042 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.1938084065914154, + "learning_rate": 1.3779950282609162e-06, + "loss": 0.1712, + "step": 24812, + "teacher_loss": 0.16863678395748138 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.26241979002952576, + "learning_rate": 1.3770442567493801e-06, + "loss": 0.1847, + "step": 24813, + "teacher_loss": 0.17604553699493408 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.3147497773170471, + "learning_rate": 1.3760937975713734e-06, + "loss": 0.1489, + "step": 24814, + "teacher_loss": 0.13042427599430084 + }, + { + "compression_loss": 0.0, + "epoch": 4.48, + "label_loss": 0.6530540585517883, + "learning_rate": 1.3751436507486898e-06, + "loss": 0.2279, + "step": 24815, + "teacher_loss": 0.18060529232025146 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.4609566330909729, + "learning_rate": 1.3741938163031153e-06, + "loss": 0.2471, + "step": 24816, + "teacher_loss": 0.22337350249290466 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 1.0050783157348633, + "learning_rate": 1.373244294256421e-06, + "loss": 0.2616, + "step": 24817, + "teacher_loss": 0.17904475331306458 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5441428422927856, + "learning_rate": 1.3722950846303794e-06, + "loss": 0.2544, + "step": 24818, + "teacher_loss": 0.2221723198890686 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.45430320501327515, + "learning_rate": 1.3713461874467564e-06, + "loss": 0.2498, + "step": 24819, + "teacher_loss": 0.227039635181427 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3739086389541626, + "learning_rate": 1.3703976027273063e-06, + "loss": 0.1684, + "step": 24820, + "teacher_loss": 0.14556419849395752 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.15170660614967346, + "learning_rate": 1.3694493304937683e-06, + "loss": 0.1916, + "step": 24821, + "teacher_loss": 0.19606655836105347 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5213661193847656, + "learning_rate": 1.3685013707678968e-06, + "loss": 0.2356, + "step": 24822, + "teacher_loss": 0.2038998156785965 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.6473950147628784, + "learning_rate": 1.3675537235714192e-06, + "loss": 0.239, + "step": 24823, + "teacher_loss": 0.19361340999603271 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5452805161476135, + "learning_rate": 1.36660638892606e-06, + "loss": 0.2018, + "step": 24824, + "teacher_loss": 0.16364173591136932 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3174707889556885, + "learning_rate": 1.36565936685354e-06, + "loss": 0.1905, + "step": 24825, + "teacher_loss": 0.17644575238227844 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.33065760135650635, + "learning_rate": 1.3647126573755768e-06, + "loss": 0.1669, + "step": 24826, + "teacher_loss": 0.14869606494903564 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.48030608892440796, + "learning_rate": 1.3637662605138684e-06, + "loss": 0.2938, + "step": 24827, + "teacher_loss": 0.27303820848464966 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3332180082798004, + "learning_rate": 1.362820176290117e-06, + "loss": 0.2622, + "step": 24828, + "teacher_loss": 0.2542579174041748 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5059642791748047, + "learning_rate": 1.3618744047260156e-06, + "loss": 0.2169, + "step": 24829, + "teacher_loss": 0.1848001778125763 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.2178192138671875, + "learning_rate": 1.3609289458432434e-06, + "loss": 0.2063, + "step": 24830, + "teacher_loss": 0.20503322780132294 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.4879953861236572, + "learning_rate": 1.3599837996634744e-06, + "loss": 0.203, + "step": 24831, + "teacher_loss": 0.17132779955863953 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3109952211380005, + "learning_rate": 1.3590389662083868e-06, + "loss": 0.2373, + "step": 24832, + "teacher_loss": 0.22914129495620728 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3827393651008606, + "learning_rate": 1.3580944454996364e-06, + "loss": 0.2531, + "step": 24833, + "teacher_loss": 0.23864270746707916 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.4307803511619568, + "learning_rate": 1.357150237558879e-06, + "loss": 0.2066, + "step": 24834, + "teacher_loss": 0.18172679841518402 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.49109745025634766, + "learning_rate": 1.3562063424077625e-06, + "loss": 0.1991, + "step": 24835, + "teacher_loss": 0.16665557026863098 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.4011456072330475, + "learning_rate": 1.3552627600679312e-06, + "loss": 0.2229, + "step": 24836, + "teacher_loss": 0.20311488211154938 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3925430178642273, + "learning_rate": 1.3543194905610146e-06, + "loss": 0.2708, + "step": 24837, + "teacher_loss": 0.2573012113571167 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.698509693145752, + "learning_rate": 1.3533765339086384e-06, + "loss": 0.2165, + "step": 24838, + "teacher_loss": 0.16299782693386078 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5401134490966797, + "learning_rate": 1.3524338901324273e-06, + "loss": 0.2181, + "step": 24839, + "teacher_loss": 0.1822671890258789 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 1.1441680192947388, + "learning_rate": 1.3514915592539857e-06, + "loss": 0.3092, + "step": 24840, + "teacher_loss": 0.21641850471496582 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.6434491872787476, + "learning_rate": 1.3505495412949225e-06, + "loss": 0.2842, + "step": 24841, + "teacher_loss": 0.24430745840072632 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.15564462542533875, + "learning_rate": 1.3496078362768394e-06, + "loss": 0.1826, + "step": 24842, + "teacher_loss": 0.18561133742332458 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3927781879901886, + "learning_rate": 1.3486664442213203e-06, + "loss": 0.1327, + "step": 24843, + "teacher_loss": 0.10376574099063873 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5456156730651855, + "learning_rate": 1.3477253651499467e-06, + "loss": 0.2467, + "step": 24844, + "teacher_loss": 0.21350297331809998 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.24306191504001617, + "learning_rate": 1.3467845990843042e-06, + "loss": 0.1786, + "step": 24845, + "teacher_loss": 0.17145463824272156 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5092607736587524, + "learning_rate": 1.345844146045956e-06, + "loss": 0.2402, + "step": 24846, + "teacher_loss": 0.21031442284584045 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.47005465626716614, + "learning_rate": 1.3449040060564627e-06, + "loss": 0.2265, + "step": 24847, + "teacher_loss": 0.19944171607494354 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.875042200088501, + "learning_rate": 1.3439641791373792e-06, + "loss": 0.2611, + "step": 24848, + "teacher_loss": 0.192830890417099 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.45506036281585693, + "learning_rate": 1.343024665310258e-06, + "loss": 0.1899, + "step": 24849, + "teacher_loss": 0.16043657064437866 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.21290555596351624, + "learning_rate": 1.3420854645966318e-06, + "loss": 0.1965, + "step": 24850, + "teacher_loss": 0.1946442425251007 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.2770000100135803, + "learning_rate": 1.341146577018037e-06, + "loss": 0.1902, + "step": 24851, + "teacher_loss": 0.18053388595581055 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.447892427444458, + "learning_rate": 1.3402080025960028e-06, + "loss": 0.2024, + "step": 24852, + "teacher_loss": 0.17512789368629456 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.6519142389297485, + "learning_rate": 1.3392697413520422e-06, + "loss": 0.2176, + "step": 24853, + "teacher_loss": 0.16930551826953888 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.6063581705093384, + "learning_rate": 1.3383317933076712e-06, + "loss": 0.2065, + "step": 24854, + "teacher_loss": 0.16211152076721191 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.29877063632011414, + "learning_rate": 1.3373941584843924e-06, + "loss": 0.1936, + "step": 24855, + "teacher_loss": 0.1819210648536682 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.37757647037506104, + "learning_rate": 1.3364568369037022e-06, + "loss": 0.2557, + "step": 24856, + "teacher_loss": 0.24213998019695282 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5091791152954102, + "learning_rate": 1.3355198285870935e-06, + "loss": 0.1999, + "step": 24857, + "teacher_loss": 0.16555072367191315 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3189675211906433, + "learning_rate": 1.3345831335560437e-06, + "loss": 0.2367, + "step": 24858, + "teacher_loss": 0.22760102152824402 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.41672682762145996, + "learning_rate": 1.333646751832031e-06, + "loss": 0.2566, + "step": 24859, + "teacher_loss": 0.23877234756946564 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3014252781867981, + "learning_rate": 1.3327106834365282e-06, + "loss": 0.3953, + "step": 24860, + "teacher_loss": 0.4056766629219055 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5225478410720825, + "learning_rate": 1.3317749283909898e-06, + "loss": 0.2122, + "step": 24861, + "teacher_loss": 0.17771360278129578 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3503858745098114, + "learning_rate": 1.3308394867168733e-06, + "loss": 0.1649, + "step": 24862, + "teacher_loss": 0.14430014789104462 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.398532509803772, + "learning_rate": 1.3299043584356268e-06, + "loss": 0.2391, + "step": 24863, + "teacher_loss": 0.22142288088798523 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.6095664501190186, + "learning_rate": 1.3289695435686865e-06, + "loss": 0.1849, + "step": 24864, + "teacher_loss": 0.13770407438278198 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.7627614140510559, + "learning_rate": 1.3280350421374888e-06, + "loss": 0.2199, + "step": 24865, + "teacher_loss": 0.15954527258872986 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.20806953310966492, + "learning_rate": 1.3271008541634544e-06, + "loss": 0.1833, + "step": 24866, + "teacher_loss": 0.1805018186569214 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.3728608787059784, + "learning_rate": 1.3261669796680048e-06, + "loss": 0.2144, + "step": 24867, + "teacher_loss": 0.1968323439359665 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.6760705709457397, + "learning_rate": 1.3252334186725513e-06, + "loss": 0.231, + "step": 24868, + "teacher_loss": 0.18158340454101562 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.5103421807289124, + "learning_rate": 1.3243001711984948e-06, + "loss": 0.2324, + "step": 24869, + "teacher_loss": 0.20153653621673584 + }, + { + "compression_loss": 0.0, + "epoch": 4.49, + "label_loss": 0.22068721055984497, + "learning_rate": 1.3233672372672367e-06, + "loss": 0.1804, + "step": 24870, + "teacher_loss": 0.1759127378463745 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.34370845556259155, + "learning_rate": 1.3224346169001583e-06, + "loss": 0.2411, + "step": 24871, + "teacher_loss": 0.2297222912311554 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4138121008872986, + "learning_rate": 1.321502310118649e-06, + "loss": 0.1752, + "step": 24872, + "teacher_loss": 0.14870071411132812 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5852549076080322, + "learning_rate": 1.3205703169440837e-06, + "loss": 0.2719, + "step": 24873, + "teacher_loss": 0.23711101710796356 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.3902353346347809, + "learning_rate": 1.3196386373978231e-06, + "loss": 0.1753, + "step": 24874, + "teacher_loss": 0.15142151713371277 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4449976086616516, + "learning_rate": 1.3187072715012355e-06, + "loss": 0.2439, + "step": 24875, + "teacher_loss": 0.22152970731258392 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.42878836393356323, + "learning_rate": 1.317776219275672e-06, + "loss": 0.2109, + "step": 24876, + "teacher_loss": 0.18670785427093506 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4768916964530945, + "learning_rate": 1.3168454807424774e-06, + "loss": 0.1832, + "step": 24877, + "teacher_loss": 0.15056294202804565 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.35752326250076294, + "learning_rate": 1.3159150559229909e-06, + "loss": 0.2025, + "step": 24878, + "teacher_loss": 0.18526506423950195 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.2957937717437744, + "learning_rate": 1.3149849448385475e-06, + "loss": 0.158, + "step": 24879, + "teacher_loss": 0.1427101343870163 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.9467372894287109, + "learning_rate": 1.31405514751047e-06, + "loss": 0.3351, + "step": 24880, + "teacher_loss": 0.26711851358413696 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.18497636914253235, + "learning_rate": 1.3131256639600697e-06, + "loss": 0.1632, + "step": 24881, + "teacher_loss": 0.1607877016067505 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.17727389931678772, + "learning_rate": 1.3121964942086694e-06, + "loss": 0.1584, + "step": 24882, + "teacher_loss": 0.15632781386375427 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.22995662689208984, + "learning_rate": 1.311267638277564e-06, + "loss": 0.1425, + "step": 24883, + "teacher_loss": 0.13276252150535583 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.30090975761413574, + "learning_rate": 1.3103390961880446e-06, + "loss": 0.1888, + "step": 24884, + "teacher_loss": 0.1763882040977478 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5656433701515198, + "learning_rate": 1.3094108679614125e-06, + "loss": 0.2518, + "step": 24885, + "teacher_loss": 0.2169492244720459 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.2606929838657379, + "learning_rate": 1.3084829536189424e-06, + "loss": 0.1892, + "step": 24886, + "teacher_loss": 0.18120211362838745 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.46706366539001465, + "learning_rate": 1.3075553531819057e-06, + "loss": 0.2047, + "step": 24887, + "teacher_loss": 0.17554797232151031 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.23889076709747314, + "learning_rate": 1.3066280666715735e-06, + "loss": 0.1705, + "step": 24888, + "teacher_loss": 0.16284796595573425 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.44100844860076904, + "learning_rate": 1.3057010941092073e-06, + "loss": 0.2585, + "step": 24889, + "teacher_loss": 0.23820900917053223 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 1.2169859409332275, + "learning_rate": 1.3047744355160551e-06, + "loss": 0.3799, + "step": 24890, + "teacher_loss": 0.28690385818481445 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.44141945242881775, + "learning_rate": 1.3038480909133631e-06, + "loss": 0.2531, + "step": 24891, + "teacher_loss": 0.23221619427204132 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4876900911331177, + "learning_rate": 1.3029220603223746e-06, + "loss": 0.2642, + "step": 24892, + "teacher_loss": 0.23939740657806396 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.6729294061660767, + "learning_rate": 1.301996343764319e-06, + "loss": 0.2655, + "step": 24893, + "teacher_loss": 0.22024670243263245 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4354380667209625, + "learning_rate": 1.301070941260411e-06, + "loss": 0.2378, + "step": 24894, + "teacher_loss": 0.21582409739494324 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5990495681762695, + "learning_rate": 1.3001458528318805e-06, + "loss": 0.2551, + "step": 24895, + "teacher_loss": 0.21685002744197845 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.3487049341201782, + "learning_rate": 1.299221078499932e-06, + "loss": 0.2, + "step": 24896, + "teacher_loss": 0.18346790969371796 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.3687381148338318, + "learning_rate": 1.2982966182857637e-06, + "loss": 0.2004, + "step": 24897, + "teacher_loss": 0.18166957795619965 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.36865270137786865, + "learning_rate": 1.297372472210575e-06, + "loss": 0.2961, + "step": 24898, + "teacher_loss": 0.2880134582519531 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.477919340133667, + "learning_rate": 1.296448640295556e-06, + "loss": 0.2065, + "step": 24899, + "teacher_loss": 0.17631492018699646 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.34093210101127625, + "learning_rate": 1.2955251225618797e-06, + "loss": 0.295, + "step": 24900, + "teacher_loss": 0.28993409872055054 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.1556183099746704, + "learning_rate": 1.2946019190307256e-06, + "loss": 0.1589, + "step": 24901, + "teacher_loss": 0.15929779410362244 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5449413061141968, + "learning_rate": 1.2936790297232603e-06, + "loss": 0.2136, + "step": 24902, + "teacher_loss": 0.1768096536397934 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.32774674892425537, + "learning_rate": 1.2927564546606401e-06, + "loss": 0.1628, + "step": 24903, + "teacher_loss": 0.144521102309227 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5019647479057312, + "learning_rate": 1.2918341938640165e-06, + "loss": 0.3192, + "step": 24904, + "teacher_loss": 0.29887545108795166 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4542728066444397, + "learning_rate": 1.2909122473545393e-06, + "loss": 0.1526, + "step": 24905, + "teacher_loss": 0.11904914677143097 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.27091312408447266, + "learning_rate": 1.2899906151533414e-06, + "loss": 0.2097, + "step": 24906, + "teacher_loss": 0.20289915800094604 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.26909172534942627, + "learning_rate": 1.289069297281551e-06, + "loss": 0.1614, + "step": 24907, + "teacher_loss": 0.14943546056747437 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.1845596730709076, + "learning_rate": 1.2881482937602946e-06, + "loss": 0.1376, + "step": 24908, + "teacher_loss": 0.13232731819152832 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.3280317187309265, + "learning_rate": 1.2872276046106901e-06, + "loss": 0.1846, + "step": 24909, + "teacher_loss": 0.16865724325180054 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4422951638698578, + "learning_rate": 1.2863072298538426e-06, + "loss": 0.1618, + "step": 24910, + "teacher_loss": 0.13066960871219635 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.2813696265220642, + "learning_rate": 1.2853871695108531e-06, + "loss": 0.162, + "step": 24911, + "teacher_loss": 0.14877942204475403 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.503632664680481, + "learning_rate": 1.2844674236028186e-06, + "loss": 0.2056, + "step": 24912, + "teacher_loss": 0.1724781095981598 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.44062715768814087, + "learning_rate": 1.2835479921508236e-06, + "loss": 0.2093, + "step": 24913, + "teacher_loss": 0.18355225026607513 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.314726859331131, + "learning_rate": 1.2826288751759495e-06, + "loss": 0.1768, + "step": 24914, + "teacher_loss": 0.16152897477149963 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.6788257360458374, + "learning_rate": 1.2817100726992698e-06, + "loss": 0.1978, + "step": 24915, + "teacher_loss": 0.14438213407993317 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5926549434661865, + "learning_rate": 1.2807915847418472e-06, + "loss": 0.2405, + "step": 24916, + "teacher_loss": 0.20140224695205688 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.46895840764045715, + "learning_rate": 1.2798734113247419e-06, + "loss": 0.2339, + "step": 24917, + "teacher_loss": 0.20773360133171082 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.23749813437461853, + "learning_rate": 1.278955552469005e-06, + "loss": 0.2077, + "step": 24918, + "teacher_loss": 0.20435330271720886 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.6081233024597168, + "learning_rate": 1.2780380081956766e-06, + "loss": 0.2486, + "step": 24919, + "teacher_loss": 0.20870795845985413 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.68998783826828, + "learning_rate": 1.2771207785258e-06, + "loss": 0.2235, + "step": 24920, + "teacher_loss": 0.1716526299715042 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4073273241519928, + "learning_rate": 1.276203863480398e-06, + "loss": 0.1585, + "step": 24921, + "teacher_loss": 0.13088031113147736 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.5293726325035095, + "learning_rate": 1.2752872630804956e-06, + "loss": 0.2109, + "step": 24922, + "teacher_loss": 0.1755514144897461 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.2349901795387268, + "learning_rate": 1.2743709773471078e-06, + "loss": 0.1809, + "step": 24923, + "teacher_loss": 0.17485250532627106 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.19986101984977722, + "learning_rate": 1.2734550063012411e-06, + "loss": 0.1508, + "step": 24924, + "teacher_loss": 0.14536993205547333 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4453723430633545, + "learning_rate": 1.272539349963897e-06, + "loss": 0.1774, + "step": 24925, + "teacher_loss": 0.1476157009601593 + }, + { + "compression_loss": 0.0, + "epoch": 4.5, + "label_loss": 0.4535841941833496, + "learning_rate": 1.2716240083560705e-06, + "loss": 0.2065, + "step": 24926, + "teacher_loss": 0.17906233668327332 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.20025601983070374, + "learning_rate": 1.2707089814987445e-06, + "loss": 0.1659, + "step": 24927, + "teacher_loss": 0.16205769777297974 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.3287839889526367, + "learning_rate": 1.2697942694129006e-06, + "loss": 0.179, + "step": 24928, + "teacher_loss": 0.16240474581718445 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.41799116134643555, + "learning_rate": 1.2688798721195054e-06, + "loss": 0.1954, + "step": 24929, + "teacher_loss": 0.17066457867622375 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.7418502569198608, + "learning_rate": 1.2679657896395292e-06, + "loss": 0.7169, + "step": 24930, + "teacher_loss": 0.7141244411468506 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.2920127511024475, + "learning_rate": 1.2670520219939263e-06, + "loss": 0.1929, + "step": 24931, + "teacher_loss": 0.1819000244140625 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.324868381023407, + "learning_rate": 1.2661385692036454e-06, + "loss": 0.1746, + "step": 24932, + "teacher_loss": 0.15793661773204803 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.19607430696487427, + "learning_rate": 1.2652254312896328e-06, + "loss": 0.1598, + "step": 24933, + "teacher_loss": 0.1557374894618988 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.7852168679237366, + "learning_rate": 1.2643126082728201e-06, + "loss": 0.2512, + "step": 24934, + "teacher_loss": 0.19183233380317688 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.5920917391777039, + "learning_rate": 1.2634001001741375e-06, + "loss": 0.2604, + "step": 24935, + "teacher_loss": 0.2235337793827057 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.5382420420646667, + "learning_rate": 1.2624879070145095e-06, + "loss": 0.2248, + "step": 24936, + "teacher_loss": 0.18995723128318787 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.5238633751869202, + "learning_rate": 1.2615760288148431e-06, + "loss": 0.2416, + "step": 24937, + "teacher_loss": 0.21029233932495117 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.6047776937484741, + "learning_rate": 1.2606644655960497e-06, + "loss": 0.2728, + "step": 24938, + "teacher_loss": 0.23592683672904968 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.39686086773872375, + "learning_rate": 1.2597532173790295e-06, + "loss": 0.1896, + "step": 24939, + "teacher_loss": 0.16662435233592987 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.41148820519447327, + "learning_rate": 1.2588422841846686e-06, + "loss": 0.2069, + "step": 24940, + "teacher_loss": 0.18413375318050385 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.4481838643550873, + "learning_rate": 1.2579316660338575e-06, + "loss": 0.3155, + "step": 24941, + "teacher_loss": 0.3007173240184784 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.478749543428421, + "learning_rate": 1.257021362947474e-06, + "loss": 0.2246, + "step": 24942, + "teacher_loss": 0.19634415209293365 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.4562843441963196, + "learning_rate": 1.2561113749463887e-06, + "loss": 0.1653, + "step": 24943, + "teacher_loss": 0.13296276330947876 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.6366191506385803, + "learning_rate": 1.255201702051456e-06, + "loss": 0.2365, + "step": 24944, + "teacher_loss": 0.19207364320755005 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.20650248229503632, + "learning_rate": 1.2542923442835464e-06, + "loss": 0.2001, + "step": 24945, + "teacher_loss": 0.1993858516216278 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.6839329600334167, + "learning_rate": 1.253383301663501e-06, + "loss": 0.2531, + "step": 24946, + "teacher_loss": 0.2051989585161209 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.34950923919677734, + "learning_rate": 1.2524745742121584e-06, + "loss": 0.1938, + "step": 24947, + "teacher_loss": 0.17648985981941223 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.3094252049922943, + "learning_rate": 1.2515661619503572e-06, + "loss": 0.1858, + "step": 24948, + "teacher_loss": 0.17206791043281555 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.30745187401771545, + "learning_rate": 1.250658064898927e-06, + "loss": 0.1808, + "step": 24949, + "teacher_loss": 0.1667398363351822 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.4365466237068176, + "learning_rate": 1.2497502830786812e-06, + "loss": 0.2565, + "step": 24950, + "teacher_loss": 0.23646265268325806 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.295702189207077, + "learning_rate": 1.2488428165104366e-06, + "loss": 0.1614, + "step": 24951, + "teacher_loss": 0.14642928540706635 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.6158324480056763, + "learning_rate": 1.2479356652149999e-06, + "loss": 0.2632, + "step": 24952, + "teacher_loss": 0.22404597699642181 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.8396400213241577, + "learning_rate": 1.2470288292131693e-06, + "loss": 0.265, + "step": 24953, + "teacher_loss": 0.20110076665878296 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.34785571694374084, + "learning_rate": 1.2461223085257267e-06, + "loss": 0.2189, + "step": 24954, + "teacher_loss": 0.2045457512140274 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.36714956164360046, + "learning_rate": 1.2452161031734705e-06, + "loss": 0.2139, + "step": 24955, + "teacher_loss": 0.1968824714422226 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.6675052642822266, + "learning_rate": 1.2443102131771688e-06, + "loss": 0.254, + "step": 24956, + "teacher_loss": 0.2080499678850174 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.41223758459091187, + "learning_rate": 1.2434046385575903e-06, + "loss": 0.1784, + "step": 24957, + "teacher_loss": 0.15245135128498077 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.29313555359840393, + "learning_rate": 1.2424993793354982e-06, + "loss": 0.2539, + "step": 24958, + "teacher_loss": 0.24952684342861176 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.21495641767978668, + "learning_rate": 1.2415944355316527e-06, + "loss": 0.1533, + "step": 24959, + "teacher_loss": 0.14642101526260376 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.35847973823547363, + "learning_rate": 1.2406898071667922e-06, + "loss": 0.2064, + "step": 24960, + "teacher_loss": 0.18953177332878113 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.5915001630783081, + "learning_rate": 1.2397854942616632e-06, + "loss": 0.2106, + "step": 24961, + "teacher_loss": 0.16825184226036072 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.48203861713409424, + "learning_rate": 1.2388814968369993e-06, + "loss": 0.2108, + "step": 24962, + "teacher_loss": 0.1806488186120987 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.2799552083015442, + "learning_rate": 1.2379778149135223e-06, + "loss": 0.1989, + "step": 24963, + "teacher_loss": 0.18991994857788086 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.3912736177444458, + "learning_rate": 1.237074448511954e-06, + "loss": 0.1498, + "step": 24964, + "teacher_loss": 0.12299604713916779 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.5617449879646301, + "learning_rate": 1.2361713976530076e-06, + "loss": 0.2435, + "step": 24965, + "teacher_loss": 0.2081899344921112 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.2986752986907959, + "learning_rate": 1.2352686623573817e-06, + "loss": 0.1756, + "step": 24966, + "teacher_loss": 0.16195544600486755 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.2902662754058838, + "learning_rate": 1.2343662426457764e-06, + "loss": 0.3264, + "step": 24967, + "teacher_loss": 0.3303978443145752 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.8083678483963013, + "learning_rate": 1.233464138538885e-06, + "loss": 0.3214, + "step": 24968, + "teacher_loss": 0.2672524154186249 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.632462739944458, + "learning_rate": 1.2325623500573863e-06, + "loss": 0.2542, + "step": 24969, + "teacher_loss": 0.21219700574874878 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.34110569953918457, + "learning_rate": 1.231660877221955e-06, + "loss": 0.1982, + "step": 24970, + "teacher_loss": 0.1823311597108841 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.7903971672058105, + "learning_rate": 1.2307597200532584e-06, + "loss": 0.3031, + "step": 24971, + "teacher_loss": 0.24891966581344604 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.24154360592365265, + "learning_rate": 1.229858878571961e-06, + "loss": 0.2124, + "step": 24972, + "teacher_loss": 0.2092091590166092 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.3956694006919861, + "learning_rate": 1.2289583527987137e-06, + "loss": 0.2042, + "step": 24973, + "teacher_loss": 0.18287552893161774 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.25109967589378357, + "learning_rate": 1.2280581427541627e-06, + "loss": 0.1755, + "step": 24974, + "teacher_loss": 0.16714423894882202 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.6288987994194031, + "learning_rate": 1.2271582484589517e-06, + "loss": 0.2506, + "step": 24975, + "teacher_loss": 0.2086101770401001 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.5662125945091248, + "learning_rate": 1.2262586699337042e-06, + "loss": 0.235, + "step": 24976, + "teacher_loss": 0.19816681742668152 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.2918793559074402, + "learning_rate": 1.225359407199052e-06, + "loss": 0.1854, + "step": 24977, + "teacher_loss": 0.17358943819999695 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.4843177795410156, + "learning_rate": 1.2244604602756104e-06, + "loss": 0.2072, + "step": 24978, + "teacher_loss": 0.17643311619758606 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.28618890047073364, + "learning_rate": 1.2235618291839878e-06, + "loss": 0.1511, + "step": 24979, + "teacher_loss": 0.1361401528120041 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.3873986303806305, + "learning_rate": 1.2226635139447912e-06, + "loss": 0.2166, + "step": 24980, + "teacher_loss": 0.19762305915355682 + }, + { + "compression_loss": 0.0, + "epoch": 4.51, + "label_loss": 0.3318076729774475, + "learning_rate": 1.2217655145786122e-06, + "loss": 0.1848, + "step": 24981, + "teacher_loss": 0.1684718132019043 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 1.028064250946045, + "learning_rate": 1.2208678311060379e-06, + "loss": 0.3238, + "step": 24982, + "teacher_loss": 0.24552811682224274 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.4785216152667999, + "learning_rate": 1.2199704635476566e-06, + "loss": 0.2309, + "step": 24983, + "teacher_loss": 0.20336297154426575 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.1786719262599945, + "learning_rate": 1.2190734119240353e-06, + "loss": 0.1757, + "step": 24984, + "teacher_loss": 0.17534658312797546 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5340508222579956, + "learning_rate": 1.2181766762557429e-06, + "loss": 0.2151, + "step": 24985, + "teacher_loss": 0.17970529198646545 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5029054880142212, + "learning_rate": 1.2172802565633423e-06, + "loss": 0.3032, + "step": 24986, + "teacher_loss": 0.28099173307418823 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5457120537757874, + "learning_rate": 1.2163841528673808e-06, + "loss": 0.2324, + "step": 24987, + "teacher_loss": 0.19758740067481995 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.27344438433647156, + "learning_rate": 1.2154883651884035e-06, + "loss": 0.211, + "step": 24988, + "teacher_loss": 0.2040776163339615 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5257419347763062, + "learning_rate": 1.2145928935469524e-06, + "loss": 0.2108, + "step": 24989, + "teacher_loss": 0.17575670778751373 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.2967720329761505, + "learning_rate": 1.2136977379635545e-06, + "loss": 0.2105, + "step": 24990, + "teacher_loss": 0.20087894797325134 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.34486064314842224, + "learning_rate": 1.2128028984587363e-06, + "loss": 0.2686, + "step": 24991, + "teacher_loss": 0.2601253390312195 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5057714581489563, + "learning_rate": 1.2119083750530086e-06, + "loss": 0.2994, + "step": 24992, + "teacher_loss": 0.27651315927505493 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.2792707681655884, + "learning_rate": 1.2110141677668845e-06, + "loss": 0.1828, + "step": 24993, + "teacher_loss": 0.1720803678035736 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.4043523371219635, + "learning_rate": 1.210120276620863e-06, + "loss": 0.2457, + "step": 24994, + "teacher_loss": 0.22809180617332458 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.524276614189148, + "learning_rate": 1.2092267016354375e-06, + "loss": 0.2026, + "step": 24995, + "teacher_loss": 0.16690057516098022 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.24703338742256165, + "learning_rate": 1.2083334428311016e-06, + "loss": 0.1558, + "step": 24996, + "teacher_loss": 0.14562749862670898 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3075560927391052, + "learning_rate": 1.2074405002283256e-06, + "loss": 0.1464, + "step": 24997, + "teacher_loss": 0.12851177155971527 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.7544401288032532, + "learning_rate": 1.2065478738475883e-06, + "loss": 0.3023, + "step": 24998, + "teacher_loss": 0.2520369291305542 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.42518627643585205, + "learning_rate": 1.2056555637093564e-06, + "loss": 0.205, + "step": 24999, + "teacher_loss": 0.1805545687675476 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.48664218187332153, + "learning_rate": 1.2047635698340803e-06, + "loss": 0.3763, + "step": 25000, + "teacher_loss": 0.364020437002182 + }, + { + "epoch": 4.52, + "eval_exact_match": 80.34058656575213, + "eval_f1": 87.71330130857804, + "step": 25000 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.4412286877632141, + "learning_rate": 1.203871892242217e-06, + "loss": 0.2291, + "step": 25001, + "teacher_loss": 0.20557951927185059 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.22568213939666748, + "learning_rate": 1.2029805309542102e-06, + "loss": 0.1726, + "step": 25002, + "teacher_loss": 0.1666538566350937 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.6624314785003662, + "learning_rate": 1.2020894859904934e-06, + "loss": 0.2063, + "step": 25003, + "teacher_loss": 0.1556379199028015 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.2870168089866638, + "learning_rate": 1.2011987573714922e-06, + "loss": 0.1785, + "step": 25004, + "teacher_loss": 0.16645720601081848 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3117113411426544, + "learning_rate": 1.2003083451176366e-06, + "loss": 0.2123, + "step": 25005, + "teacher_loss": 0.201265349984169 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.22180937230587006, + "learning_rate": 1.199418249249339e-06, + "loss": 0.2, + "step": 25006, + "teacher_loss": 0.19752441346645355 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.399008572101593, + "learning_rate": 1.1985284697869975e-06, + "loss": 0.2327, + "step": 25007, + "teacher_loss": 0.214262455701828 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5661318302154541, + "learning_rate": 1.1976390067510262e-06, + "loss": 0.2471, + "step": 25008, + "teacher_loss": 0.21164795756340027 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3104586601257324, + "learning_rate": 1.1967498601618089e-06, + "loss": 0.1788, + "step": 25009, + "teacher_loss": 0.16413713991641998 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.20756185054779053, + "learning_rate": 1.1958610300397321e-06, + "loss": 0.2193, + "step": 25010, + "teacher_loss": 0.22057145833969116 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.21955274045467377, + "learning_rate": 1.194972516405175e-06, + "loss": 0.1711, + "step": 25011, + "teacher_loss": 0.16573864221572876 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.34229278564453125, + "learning_rate": 1.1940843192785094e-06, + "loss": 0.1837, + "step": 25012, + "teacher_loss": 0.1660410314798355 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5969566106796265, + "learning_rate": 1.1931964386800991e-06, + "loss": 0.3656, + "step": 25013, + "teacher_loss": 0.3399437665939331 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.42115676403045654, + "learning_rate": 1.1923088746302946e-06, + "loss": 0.2115, + "step": 25014, + "teacher_loss": 0.18816983699798584 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.34598255157470703, + "learning_rate": 1.1914216271494544e-06, + "loss": 0.2146, + "step": 25015, + "teacher_loss": 0.20001763105392456 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.47102367877960205, + "learning_rate": 1.1905346962579172e-06, + "loss": 0.2344, + "step": 25016, + "teacher_loss": 0.20816031098365784 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.18410956859588623, + "learning_rate": 1.1896480819760103e-06, + "loss": 0.223, + "step": 25017, + "teacher_loss": 0.22734716534614563 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.42701083421707153, + "learning_rate": 1.1887617843240723e-06, + "loss": 0.1956, + "step": 25018, + "teacher_loss": 0.16984519362449646 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3775167465209961, + "learning_rate": 1.1878758033224185e-06, + "loss": 0.2078, + "step": 25019, + "teacher_loss": 0.18896019458770752 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.1675519198179245, + "learning_rate": 1.1869901389913596e-06, + "loss": 0.1541, + "step": 25020, + "teacher_loss": 0.15263840556144714 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.36119040846824646, + "learning_rate": 1.186104791351204e-06, + "loss": 0.234, + "step": 25021, + "teacher_loss": 0.2198522388935089 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5766996145248413, + "learning_rate": 1.1852197604222508e-06, + "loss": 0.3491, + "step": 25022, + "teacher_loss": 0.32385721802711487 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3426487445831299, + "learning_rate": 1.184335046224787e-06, + "loss": 0.2941, + "step": 25023, + "teacher_loss": 0.2886542081832886 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.33055639266967773, + "learning_rate": 1.1834506487790997e-06, + "loss": 0.2151, + "step": 25024, + "teacher_loss": 0.20221924781799316 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.28464221954345703, + "learning_rate": 1.182566568105466e-06, + "loss": 0.1728, + "step": 25025, + "teacher_loss": 0.1603776514530182 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3991052210330963, + "learning_rate": 1.1816828042241528e-06, + "loss": 0.191, + "step": 25026, + "teacher_loss": 0.16782169044017792 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3157411217689514, + "learning_rate": 1.1807993571554226e-06, + "loss": 0.1656, + "step": 25027, + "teacher_loss": 0.14889967441558838 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3775656223297119, + "learning_rate": 1.179916226919534e-06, + "loss": 0.1853, + "step": 25028, + "teacher_loss": 0.16395828127861023 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.31451690196990967, + "learning_rate": 1.1790334135367292e-06, + "loss": 0.1458, + "step": 25029, + "teacher_loss": 0.12704439461231232 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.7381253242492676, + "learning_rate": 1.1781509170272536e-06, + "loss": 0.2901, + "step": 25030, + "teacher_loss": 0.2403007447719574 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3375489413738251, + "learning_rate": 1.1772687374113344e-06, + "loss": 0.2574, + "step": 25031, + "teacher_loss": 0.24850626289844513 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.5446094274520874, + "learning_rate": 1.1763868747092039e-06, + "loss": 0.2348, + "step": 25032, + "teacher_loss": 0.20035159587860107 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.3683244287967682, + "learning_rate": 1.1755053289410738e-06, + "loss": 0.1744, + "step": 25033, + "teacher_loss": 0.15283891558647156 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.4383699297904968, + "learning_rate": 1.1746241001271584e-06, + "loss": 0.1674, + "step": 25034, + "teacher_loss": 0.13734087347984314 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.6469532251358032, + "learning_rate": 1.1737431882876632e-06, + "loss": 0.2621, + "step": 25035, + "teacher_loss": 0.21930164098739624 + }, + { + "compression_loss": 0.0, + "epoch": 4.52, + "label_loss": 0.9872376918792725, + "learning_rate": 1.1728625934427818e-06, + "loss": 0.2689, + "step": 25036, + "teacher_loss": 0.1890956461429596 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.8092731237411499, + "learning_rate": 1.1719823156127062e-06, + "loss": 0.3189, + "step": 25037, + "teacher_loss": 0.26438552141189575 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4695800542831421, + "learning_rate": 1.1711023548176193e-06, + "loss": 0.246, + "step": 25038, + "teacher_loss": 0.2211138904094696 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7202006578445435, + "learning_rate": 1.1702227110776909e-06, + "loss": 0.3699, + "step": 25039, + "teacher_loss": 0.33100342750549316 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.2920592129230499, + "learning_rate": 1.1693433844130935e-06, + "loss": 0.1784, + "step": 25040, + "teacher_loss": 0.16577807068824768 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7296040058135986, + "learning_rate": 1.1684643748439877e-06, + "loss": 0.3363, + "step": 25041, + "teacher_loss": 0.2926402688026428 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.2695853114128113, + "learning_rate": 1.1675856823905224e-06, + "loss": 0.2339, + "step": 25042, + "teacher_loss": 0.22995613515377045 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.3277488648891449, + "learning_rate": 1.1667073070728462e-06, + "loss": 0.211, + "step": 25043, + "teacher_loss": 0.19802260398864746 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.46200937032699585, + "learning_rate": 1.1658292489110967e-06, + "loss": 0.1798, + "step": 25044, + "teacher_loss": 0.14841556549072266 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.19668267667293549, + "learning_rate": 1.1649515079254058e-06, + "loss": 0.1735, + "step": 25045, + "teacher_loss": 0.17093007266521454 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.3090388774871826, + "learning_rate": 1.1640740841358978e-06, + "loss": 0.1904, + "step": 25046, + "teacher_loss": 0.17718321084976196 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5538351535797119, + "learning_rate": 1.1631969775626877e-06, + "loss": 0.2283, + "step": 25047, + "teacher_loss": 0.192184716463089 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.3783976435661316, + "learning_rate": 1.1623201882258866e-06, + "loss": 0.1613, + "step": 25048, + "teacher_loss": 0.13714444637298584 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.2524876594543457, + "learning_rate": 1.1614437161455965e-06, + "loss": 0.2038, + "step": 25049, + "teacher_loss": 0.19834274053573608 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7641234993934631, + "learning_rate": 1.1605675613419115e-06, + "loss": 0.329, + "step": 25050, + "teacher_loss": 0.2806835472583771 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.32838648557662964, + "learning_rate": 1.1596917238349202e-06, + "loss": 0.1898, + "step": 25051, + "teacher_loss": 0.17444922029972076 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.37490326166152954, + "learning_rate": 1.1588162036447003e-06, + "loss": 0.189, + "step": 25052, + "teacher_loss": 0.16836583614349365 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.12214823812246323, + "learning_rate": 1.1579410007913304e-06, + "loss": 0.126, + "step": 25053, + "teacher_loss": 0.12637659907341003 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.33858349919319153, + "learning_rate": 1.1570661152948697e-06, + "loss": 0.2241, + "step": 25054, + "teacher_loss": 0.21137921512126923 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4753701090812683, + "learning_rate": 1.1561915471753786e-06, + "loss": 0.2254, + "step": 25055, + "teacher_loss": 0.1975809931755066 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.18832360208034515, + "learning_rate": 1.155317296452913e-06, + "loss": 0.1843, + "step": 25056, + "teacher_loss": 0.18381932377815247 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.3490745723247528, + "learning_rate": 1.1544433631475116e-06, + "loss": 0.2244, + "step": 25057, + "teacher_loss": 0.21055419743061066 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4071692228317261, + "learning_rate": 1.1535697472792118e-06, + "loss": 0.1658, + "step": 25058, + "teacher_loss": 0.13894259929656982 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.3694760203361511, + "learning_rate": 1.1526964488680457e-06, + "loss": 0.2253, + "step": 25059, + "teacher_loss": 0.20922821760177612 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4758175015449524, + "learning_rate": 1.151823467934031e-06, + "loss": 0.1901, + "step": 25060, + "teacher_loss": 0.15833674371242523 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5689733028411865, + "learning_rate": 1.1509508044971867e-06, + "loss": 0.2055, + "step": 25061, + "teacher_loss": 0.16513517498970032 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.17930416762828827, + "learning_rate": 1.1500784585775215e-06, + "loss": 0.1716, + "step": 25062, + "teacher_loss": 0.1707848459482193 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.2228250503540039, + "learning_rate": 1.1492064301950295e-06, + "loss": 0.2576, + "step": 25063, + "teacher_loss": 0.2614786624908447 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.28050899505615234, + "learning_rate": 1.1483347193697063e-06, + "loss": 0.2504, + "step": 25064, + "teacher_loss": 0.24701324105262756 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.21907195448875427, + "learning_rate": 1.1474633261215428e-06, + "loss": 0.1548, + "step": 25065, + "teacher_loss": 0.14768186211585999 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5917607545852661, + "learning_rate": 1.1465922504705128e-06, + "loss": 0.2096, + "step": 25066, + "teacher_loss": 0.16710935533046722 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.23097911477088928, + "learning_rate": 1.145721492436582e-06, + "loss": 0.1686, + "step": 25067, + "teacher_loss": 0.16168928146362305 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5737055540084839, + "learning_rate": 1.1448510520397264e-06, + "loss": 0.3113, + "step": 25068, + "teacher_loss": 0.28211894631385803 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.23001497983932495, + "learning_rate": 1.143980929299895e-06, + "loss": 0.1667, + "step": 25069, + "teacher_loss": 0.1596429944038391 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5855672359466553, + "learning_rate": 1.1431111242370363e-06, + "loss": 0.2144, + "step": 25070, + "teacher_loss": 0.17311370372772217 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.44978660345077515, + "learning_rate": 1.1422416368710947e-06, + "loss": 0.1688, + "step": 25071, + "teacher_loss": 0.13755042850971222 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7153927683830261, + "learning_rate": 1.1413724672220077e-06, + "loss": 0.264, + "step": 25072, + "teacher_loss": 0.2138305902481079 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5571683645248413, + "learning_rate": 1.140503615309696e-06, + "loss": 0.2274, + "step": 25073, + "teacher_loss": 0.19073250889778137 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.41072770953178406, + "learning_rate": 1.1396350811540852e-06, + "loss": 0.1971, + "step": 25074, + "teacher_loss": 0.17338716983795166 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4944639205932617, + "learning_rate": 1.1387668647750875e-06, + "loss": 0.2496, + "step": 25075, + "teacher_loss": 0.22236031293869019 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.32629427313804626, + "learning_rate": 1.1378989661926077e-06, + "loss": 0.1456, + "step": 25076, + "teacher_loss": 0.1255749762058258 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.6234003305435181, + "learning_rate": 1.137031385426539e-06, + "loss": 0.2217, + "step": 25077, + "teacher_loss": 0.17709439992904663 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7568278312683105, + "learning_rate": 1.1361641224967811e-06, + "loss": 0.2334, + "step": 25078, + "teacher_loss": 0.17519566416740417 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.2586619257926941, + "learning_rate": 1.1352971774232162e-06, + "loss": 0.1707, + "step": 25079, + "teacher_loss": 0.16094785928726196 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.42158079147338867, + "learning_rate": 1.1344305502257135e-06, + "loss": 0.2289, + "step": 25080, + "teacher_loss": 0.20748449862003326 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.47270193696022034, + "learning_rate": 1.1335642409241486e-06, + "loss": 0.2528, + "step": 25081, + "teacher_loss": 0.2283683717250824 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.32642054557800293, + "learning_rate": 1.1326982495383842e-06, + "loss": 0.1876, + "step": 25082, + "teacher_loss": 0.1722283959388733 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.31233739852905273, + "learning_rate": 1.131832576088271e-06, + "loss": 0.2188, + "step": 25083, + "teacher_loss": 0.2083582580089569 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.267640084028244, + "learning_rate": 1.1309672205936561e-06, + "loss": 0.1698, + "step": 25084, + "teacher_loss": 0.1588984578847885 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7057147026062012, + "learning_rate": 1.1301021830743858e-06, + "loss": 0.2451, + "step": 25085, + "teacher_loss": 0.19390638172626495 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4710800051689148, + "learning_rate": 1.1292374635502838e-06, + "loss": 0.1613, + "step": 25086, + "teacher_loss": 0.12689335644245148 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.347565621137619, + "learning_rate": 1.1283730620411814e-06, + "loss": 0.1565, + "step": 25087, + "teacher_loss": 0.13525059819221497 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5915053486824036, + "learning_rate": 1.127508978566899e-06, + "loss": 0.2188, + "step": 25088, + "teacher_loss": 0.17741575837135315 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.4275597929954529, + "learning_rate": 1.1266452131472393e-06, + "loss": 0.1728, + "step": 25089, + "teacher_loss": 0.14450258016586304 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.7749910354614258, + "learning_rate": 1.1257817658020114e-06, + "loss": 0.2719, + "step": 25090, + "teacher_loss": 0.21605314314365387 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.5343226790428162, + "learning_rate": 1.1249186365510128e-06, + "loss": 0.2768, + "step": 25091, + "teacher_loss": 0.2481517195701599 + }, + { + "compression_loss": 0.0, + "epoch": 4.53, + "label_loss": 0.2786710560321808, + "learning_rate": 1.1240558254140277e-06, + "loss": 0.1726, + "step": 25092, + "teacher_loss": 0.16082261502742767 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.3009941875934601, + "learning_rate": 1.1231933324108417e-06, + "loss": 0.1556, + "step": 25093, + "teacher_loss": 0.13947902619838715 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.16383624076843262, + "learning_rate": 1.1223311575612261e-06, + "loss": 0.1959, + "step": 25094, + "teacher_loss": 0.19949132204055786 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 1.050405502319336, + "learning_rate": 1.1214693008849531e-06, + "loss": 0.3097, + "step": 25095, + "teacher_loss": 0.22742529213428497 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.44941550493240356, + "learning_rate": 1.1206077624017752e-06, + "loss": 0.2111, + "step": 25096, + "teacher_loss": 0.18467360734939575 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.27394360303878784, + "learning_rate": 1.11974654213145e-06, + "loss": 0.2215, + "step": 25097, + "teacher_loss": 0.21564587950706482 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.3398643732070923, + "learning_rate": 1.1188856400937219e-06, + "loss": 0.1948, + "step": 25098, + "teacher_loss": 0.17869465053081512 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 1.252565860748291, + "learning_rate": 1.1180250563083282e-06, + "loss": 0.2831, + "step": 25099, + "teacher_loss": 0.1753694713115692 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.18907268345355988, + "learning_rate": 1.1171647907949983e-06, + "loss": 0.1785, + "step": 25100, + "teacher_loss": 0.17733994126319885 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.36895787715911865, + "learning_rate": 1.1163048435734596e-06, + "loss": 0.252, + "step": 25101, + "teacher_loss": 0.23903557658195496 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4971223473548889, + "learning_rate": 1.1154452146634232e-06, + "loss": 0.2092, + "step": 25102, + "teacher_loss": 0.177154541015625 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.659134030342102, + "learning_rate": 1.1145859040846012e-06, + "loss": 0.2489, + "step": 25103, + "teacher_loss": 0.2033492624759674 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6527400016784668, + "learning_rate": 1.1137269118566935e-06, + "loss": 0.2317, + "step": 25104, + "teacher_loss": 0.18491825461387634 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.48554086685180664, + "learning_rate": 1.112868237999392e-06, + "loss": 0.2122, + "step": 25105, + "teacher_loss": 0.18184050917625427 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5579530000686646, + "learning_rate": 1.1120098825323898e-06, + "loss": 0.2251, + "step": 25106, + "teacher_loss": 0.18811756372451782 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.7567838430404663, + "learning_rate": 1.111151845475361e-06, + "loss": 0.2668, + "step": 25107, + "teacher_loss": 0.2123786360025406 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.36765024065971375, + "learning_rate": 1.1102941268479783e-06, + "loss": 0.1497, + "step": 25108, + "teacher_loss": 0.1255100965499878 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.999617874622345, + "learning_rate": 1.1094367266699107e-06, + "loss": 0.3132, + "step": 25109, + "teacher_loss": 0.23692744970321655 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.642194390296936, + "learning_rate": 1.1085796449608093e-06, + "loss": 0.2476, + "step": 25110, + "teacher_loss": 0.20377424359321594 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5008236765861511, + "learning_rate": 1.1077228817403302e-06, + "loss": 0.2679, + "step": 25111, + "teacher_loss": 0.2420269250869751 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.3790559470653534, + "learning_rate": 1.106866437028114e-06, + "loss": 0.2438, + "step": 25112, + "teacher_loss": 0.22879081964492798 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.2707124650478363, + "learning_rate": 1.1060103108437969e-06, + "loss": 0.1901, + "step": 25113, + "teacher_loss": 0.18117444217205048 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.2220154106616974, + "learning_rate": 1.1051545032070081e-06, + "loss": 0.1842, + "step": 25114, + "teacher_loss": 0.1799495667219162 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4881695508956909, + "learning_rate": 1.1042990141373654e-06, + "loss": 0.1943, + "step": 25115, + "teacher_loss": 0.1616876721382141 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5080932378768921, + "learning_rate": 1.1034438436544863e-06, + "loss": 0.1968, + "step": 25116, + "teacher_loss": 0.16225430369377136 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4047453999519348, + "learning_rate": 1.1025889917779735e-06, + "loss": 0.2475, + "step": 25117, + "teacher_loss": 0.2300504446029663 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.38214755058288574, + "learning_rate": 1.1017344585274297e-06, + "loss": 0.1934, + "step": 25118, + "teacher_loss": 0.17240500450134277 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.16782429814338684, + "learning_rate": 1.1008802439224474e-06, + "loss": 0.1559, + "step": 25119, + "teacher_loss": 0.1545776128768921 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.3021412789821625, + "learning_rate": 1.1000263479826078e-06, + "loss": 0.1491, + "step": 25120, + "teacher_loss": 0.13211965560913086 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4685192108154297, + "learning_rate": 1.0991727707274885e-06, + "loss": 0.209, + "step": 25121, + "teacher_loss": 0.18015766143798828 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6349761486053467, + "learning_rate": 1.0983195121766637e-06, + "loss": 0.1997, + "step": 25122, + "teacher_loss": 0.15133631229400635 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5472557544708252, + "learning_rate": 1.0974665723496914e-06, + "loss": 0.2031, + "step": 25123, + "teacher_loss": 0.16482828557491302 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.1830618679523468, + "learning_rate": 1.0966139512661273e-06, + "loss": 0.1837, + "step": 25124, + "teacher_loss": 0.18378564715385437 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.42108988761901855, + "learning_rate": 1.0957616489455242e-06, + "loss": 0.2916, + "step": 25125, + "teacher_loss": 0.2772218585014343 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.14607976377010345, + "learning_rate": 1.0949096654074198e-06, + "loss": 0.1428, + "step": 25126, + "teacher_loss": 0.14244094491004944 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 1.0830754041671753, + "learning_rate": 1.0940580006713403e-06, + "loss": 0.3867, + "step": 25127, + "teacher_loss": 0.30932193994522095 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4186926484107971, + "learning_rate": 1.0932066547568248e-06, + "loss": 0.2469, + "step": 25128, + "teacher_loss": 0.2277604341506958 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.3768858015537262, + "learning_rate": 1.0923556276833862e-06, + "loss": 0.1853, + "step": 25129, + "teacher_loss": 0.16398711502552032 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4666866958141327, + "learning_rate": 1.0915049194705306e-06, + "loss": 0.2756, + "step": 25130, + "teacher_loss": 0.25437361001968384 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.4775325059890747, + "learning_rate": 1.0906545301377725e-06, + "loss": 0.2133, + "step": 25131, + "teacher_loss": 0.18394434452056885 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.24803099036216736, + "learning_rate": 1.0898044597046043e-06, + "loss": 0.1852, + "step": 25132, + "teacher_loss": 0.17826637625694275 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6650961637496948, + "learning_rate": 1.0889547081905105e-06, + "loss": 0.2346, + "step": 25133, + "teacher_loss": 0.18672078847885132 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.11538488417863846, + "learning_rate": 1.088105275614979e-06, + "loss": 0.1212, + "step": 25134, + "teacher_loss": 0.12179238349199295 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.30577942728996277, + "learning_rate": 1.0872561619974858e-06, + "loss": 0.2014, + "step": 25135, + "teacher_loss": 0.18981948494911194 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5678088665008545, + "learning_rate": 1.086407367357497e-06, + "loss": 0.3802, + "step": 25136, + "teacher_loss": 0.3593955338001251 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5712772607803345, + "learning_rate": 1.085558891714467e-06, + "loss": 0.2317, + "step": 25137, + "teacher_loss": 0.19399836659431458 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6207983493804932, + "learning_rate": 1.084710735087857e-06, + "loss": 0.1846, + "step": 25138, + "teacher_loss": 0.13608869910240173 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.3965565860271454, + "learning_rate": 1.0838628974971115e-06, + "loss": 0.243, + "step": 25139, + "teacher_loss": 0.22591203451156616 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.38403910398483276, + "learning_rate": 1.0830153789616614e-06, + "loss": 0.2249, + "step": 25140, + "teacher_loss": 0.20726221799850464 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.5643201470375061, + "learning_rate": 1.0821681795009498e-06, + "loss": 0.2601, + "step": 25141, + "teacher_loss": 0.22627192735671997 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6626375913619995, + "learning_rate": 1.0813212991343924e-06, + "loss": 0.2416, + "step": 25142, + "teacher_loss": 0.19484224915504456 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.9159227013587952, + "learning_rate": 1.0804747378814039e-06, + "loss": 0.442, + "step": 25143, + "teacher_loss": 0.38931912183761597 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.09402231127023697, + "learning_rate": 1.079628495761399e-06, + "loss": 0.1935, + "step": 25144, + "teacher_loss": 0.2045225203037262 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6535812616348267, + "learning_rate": 1.0787825727937783e-06, + "loss": 0.2397, + "step": 25145, + "teacher_loss": 0.19369667768478394 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6446030139923096, + "learning_rate": 1.0779369689979335e-06, + "loss": 0.2019, + "step": 25146, + "teacher_loss": 0.1527140736579895 + }, + { + "compression_loss": 0.0, + "epoch": 4.54, + "label_loss": 0.6177886128425598, + "learning_rate": 1.0770916843932538e-06, + "loss": 0.3612, + "step": 25147, + "teacher_loss": 0.3326902389526367 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.24591708183288574, + "learning_rate": 1.0762467189991205e-06, + "loss": 0.1347, + "step": 25148, + "teacher_loss": 0.12238724529743195 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.39661920070648193, + "learning_rate": 1.0754020728349011e-06, + "loss": 0.1979, + "step": 25149, + "teacher_loss": 0.17583858966827393 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5616382956504822, + "learning_rate": 1.0745577459199656e-06, + "loss": 0.248, + "step": 25150, + "teacher_loss": 0.21316689252853394 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4333067238330841, + "learning_rate": 1.0737137382736717e-06, + "loss": 0.2972, + "step": 25151, + "teacher_loss": 0.2820393741130829 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2262304425239563, + "learning_rate": 1.0728700499153671e-06, + "loss": 0.1616, + "step": 25152, + "teacher_loss": 0.15446409583091736 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4539685845375061, + "learning_rate": 1.072026680864398e-06, + "loss": 0.2018, + "step": 25153, + "teacher_loss": 0.17375105619430542 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4044560492038727, + "learning_rate": 1.0711836311400974e-06, + "loss": 0.1953, + "step": 25154, + "teacher_loss": 0.1720491349697113 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.3996172547340393, + "learning_rate": 1.0703409007617965e-06, + "loss": 0.3368, + "step": 25155, + "teacher_loss": 0.32979950308799744 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2790040373802185, + "learning_rate": 1.0694984897488146e-06, + "loss": 0.2074, + "step": 25156, + "teacher_loss": 0.1994135081768036 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.45737403631210327, + "learning_rate": 1.068656398120465e-06, + "loss": 0.254, + "step": 25157, + "teacher_loss": 0.23138466477394104 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.9622973203659058, + "learning_rate": 1.0678146258960602e-06, + "loss": 0.2801, + "step": 25158, + "teacher_loss": 0.204318106174469 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4137294590473175, + "learning_rate": 1.06697317309489e-06, + "loss": 0.2289, + "step": 25159, + "teacher_loss": 0.20839133858680725 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.558612585067749, + "learning_rate": 1.0661320397362539e-06, + "loss": 0.2709, + "step": 25160, + "teacher_loss": 0.23889321088790894 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4955894947052002, + "learning_rate": 1.0652912258394366e-06, + "loss": 0.2128, + "step": 25161, + "teacher_loss": 0.18142831325531006 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.671654462814331, + "learning_rate": 1.064450731423709e-06, + "loss": 0.2141, + "step": 25162, + "teacher_loss": 0.16328689455986023 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.21502867341041565, + "learning_rate": 1.063610556508346e-06, + "loss": 0.1517, + "step": 25163, + "teacher_loss": 0.14466698467731476 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.3167605698108673, + "learning_rate": 1.062770701112612e-06, + "loss": 0.1566, + "step": 25164, + "teacher_loss": 0.1387665867805481 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.8184818029403687, + "learning_rate": 1.0619311652557584e-06, + "loss": 0.2098, + "step": 25165, + "teacher_loss": 0.14219698309898376 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.3001149296760559, + "learning_rate": 1.0610919489570364e-06, + "loss": 0.1871, + "step": 25166, + "teacher_loss": 0.1745389848947525 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.27030065655708313, + "learning_rate": 1.0602530522356825e-06, + "loss": 0.2793, + "step": 25167, + "teacher_loss": 0.280300498008728 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2223224937915802, + "learning_rate": 1.0594144751109324e-06, + "loss": 0.1328, + "step": 25168, + "teacher_loss": 0.12287843227386475 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.7528339624404907, + "learning_rate": 1.0585762176020148e-06, + "loss": 0.2405, + "step": 25169, + "teacher_loss": 0.1835213005542755 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.30148136615753174, + "learning_rate": 1.0577382797281437e-06, + "loss": 0.1946, + "step": 25170, + "teacher_loss": 0.18274180591106415 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 1.0508067607879639, + "learning_rate": 1.0569006615085325e-06, + "loss": 0.3364, + "step": 25171, + "teacher_loss": 0.25701552629470825 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.44432538747787476, + "learning_rate": 1.0560633629623872e-06, + "loss": 0.2628, + "step": 25172, + "teacher_loss": 0.24266645312309265 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2835889458656311, + "learning_rate": 1.055226384108901e-06, + "loss": 0.2546, + "step": 25173, + "teacher_loss": 0.25141146779060364 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4015403389930725, + "learning_rate": 1.0543897249672667e-06, + "loss": 0.2117, + "step": 25174, + "teacher_loss": 0.1905989944934845 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.7630246877670288, + "learning_rate": 1.053553385556666e-06, + "loss": 0.2705, + "step": 25175, + "teacher_loss": 0.215753972530365 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5852357149124146, + "learning_rate": 1.052717365896273e-06, + "loss": 0.226, + "step": 25176, + "teacher_loss": 0.18610183894634247 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.19653409719467163, + "learning_rate": 1.051881666005251e-06, + "loss": 0.185, + "step": 25177, + "teacher_loss": 0.18367037177085876 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.883455753326416, + "learning_rate": 1.0510462859027648e-06, + "loss": 0.2655, + "step": 25178, + "teacher_loss": 0.19685232639312744 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2505333423614502, + "learning_rate": 1.0502112256079688e-06, + "loss": 0.1574, + "step": 25179, + "teacher_loss": 0.1470758616924286 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5195713043212891, + "learning_rate": 1.049376485140003e-06, + "loss": 0.1961, + "step": 25180, + "teacher_loss": 0.1601724624633789 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.3929944932460785, + "learning_rate": 1.0485420645180082e-06, + "loss": 0.1848, + "step": 25181, + "teacher_loss": 0.16162265837192535 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5909929275512695, + "learning_rate": 1.047707963761118e-06, + "loss": 0.3129, + "step": 25182, + "teacher_loss": 0.2819991707801819 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.0995166227221489, + "learning_rate": 1.0468741828884503e-06, + "loss": 0.1044, + "step": 25183, + "teacher_loss": 0.10490189492702484 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.44798195362091064, + "learning_rate": 1.0460407219191248e-06, + "loss": 0.1991, + "step": 25184, + "teacher_loss": 0.17145895957946777 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.279528945684433, + "learning_rate": 1.0452075808722512e-06, + "loss": 0.256, + "step": 25185, + "teacher_loss": 0.25340765714645386 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.21935133635997772, + "learning_rate": 1.0443747597669256e-06, + "loss": 0.1607, + "step": 25186, + "teacher_loss": 0.15415659546852112 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.23632782697677612, + "learning_rate": 1.0435422586222465e-06, + "loss": 0.164, + "step": 25187, + "teacher_loss": 0.15593472123146057 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.19084838032722473, + "learning_rate": 1.0427100774573034e-06, + "loss": 0.1279, + "step": 25188, + "teacher_loss": 0.12093309313058853 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5360528826713562, + "learning_rate": 1.041878216291171e-06, + "loss": 0.2632, + "step": 25189, + "teacher_loss": 0.23288151621818542 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5119979381561279, + "learning_rate": 1.0410466751429176e-06, + "loss": 0.2711, + "step": 25190, + "teacher_loss": 0.2443457841873169 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.27491700649261475, + "learning_rate": 1.0402154540316178e-06, + "loss": 0.3808, + "step": 25191, + "teacher_loss": 0.3925568759441376 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.23057198524475098, + "learning_rate": 1.039384552976323e-06, + "loss": 0.1879, + "step": 25192, + "teacher_loss": 0.1831241101026535 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.34249866008758545, + "learning_rate": 1.038553971996083e-06, + "loss": 0.1674, + "step": 25193, + "teacher_loss": 0.14799395203590393 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.3817611336708069, + "learning_rate": 1.0377237111099425e-06, + "loss": 0.2242, + "step": 25194, + "teacher_loss": 0.20665612816810608 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.8364651203155518, + "learning_rate": 1.036893770336938e-06, + "loss": 0.3327, + "step": 25195, + "teacher_loss": 0.2767726182937622 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.5810595154762268, + "learning_rate": 1.0360641496960927e-06, + "loss": 0.4188, + "step": 25196, + "teacher_loss": 0.4007996916770935 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.3370826840400696, + "learning_rate": 1.0352348492064312e-06, + "loss": 0.2211, + "step": 25197, + "teacher_loss": 0.2081608772277832 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.20390617847442627, + "learning_rate": 1.0344058688869683e-06, + "loss": 0.1645, + "step": 25198, + "teacher_loss": 0.16011005640029907 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2785056531429291, + "learning_rate": 1.0335772087567075e-06, + "loss": 0.1424, + "step": 25199, + "teacher_loss": 0.1272660493850708 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2310413122177124, + "learning_rate": 1.032748868834643e-06, + "loss": 0.2409, + "step": 25200, + "teacher_loss": 0.24204185605049133 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.2546086609363556, + "learning_rate": 1.0319208491397752e-06, + "loss": 0.1736, + "step": 25201, + "teacher_loss": 0.16459433734416962 + }, + { + "compression_loss": 0.0, + "epoch": 4.55, + "label_loss": 0.4630472660064697, + "learning_rate": 1.0310931496910853e-06, + "loss": 0.2595, + "step": 25202, + "teacher_loss": 0.23683932423591614 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.9697644710540771, + "learning_rate": 1.0302657705075447e-06, + "loss": 0.2937, + "step": 25203, + "teacher_loss": 0.21855421364307404 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.17329362034797668, + "learning_rate": 1.0294387116081283e-06, + "loss": 0.1571, + "step": 25204, + "teacher_loss": 0.15532107651233673 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 1.0363295078277588, + "learning_rate": 1.0286119730117976e-06, + "loss": 0.2622, + "step": 25205, + "teacher_loss": 0.1761317253112793 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.7478715777397156, + "learning_rate": 1.0277855547375025e-06, + "loss": 0.2511, + "step": 25206, + "teacher_loss": 0.19586652517318726 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.4772930443286896, + "learning_rate": 1.0269594568041961e-06, + "loss": 0.1979, + "step": 25207, + "teacher_loss": 0.16687631607055664 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.662340521812439, + "learning_rate": 1.0261336792308168e-06, + "loss": 0.2659, + "step": 25208, + "teacher_loss": 0.2218572199344635 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.6691567897796631, + "learning_rate": 1.0253082220362942e-06, + "loss": 0.2393, + "step": 25209, + "teacher_loss": 0.1915469765663147 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.340048611164093, + "learning_rate": 1.0244830852395564e-06, + "loss": 0.2408, + "step": 25210, + "teacher_loss": 0.22972318530082703 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.6415939331054688, + "learning_rate": 1.0236582688595219e-06, + "loss": 0.2259, + "step": 25211, + "teacher_loss": 0.179696187376976 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.27981430292129517, + "learning_rate": 1.022833772915099e-06, + "loss": 0.2036, + "step": 25212, + "teacher_loss": 0.19510118663311005 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.36495888233184814, + "learning_rate": 1.022009597425192e-06, + "loss": 0.1686, + "step": 25213, + "teacher_loss": 0.14675463736057281 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.45230725407600403, + "learning_rate": 1.0211857424086979e-06, + "loss": 0.2597, + "step": 25214, + "teacher_loss": 0.23835399746894836 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5013591051101685, + "learning_rate": 1.0203622078845032e-06, + "loss": 0.2377, + "step": 25215, + "teacher_loss": 0.2083880603313446 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.31093183159828186, + "learning_rate": 1.019538993871491e-06, + "loss": 0.1468, + "step": 25216, + "teacher_loss": 0.12855631113052368 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5743951797485352, + "learning_rate": 1.0187161003885315e-06, + "loss": 0.2661, + "step": 25217, + "teacher_loss": 0.2318832278251648 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.40434110164642334, + "learning_rate": 1.0178935274544975e-06, + "loss": 0.1918, + "step": 25218, + "teacher_loss": 0.1682206392288208 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.46418869495391846, + "learning_rate": 1.017071275088241e-06, + "loss": 0.266, + "step": 25219, + "teacher_loss": 0.24398571252822876 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.33546945452690125, + "learning_rate": 1.0162493433086167e-06, + "loss": 0.205, + "step": 25220, + "teacher_loss": 0.19055090844631195 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.6674797534942627, + "learning_rate": 1.015427732134473e-06, + "loss": 0.2149, + "step": 25221, + "teacher_loss": 0.16464710235595703 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.2732957899570465, + "learning_rate": 1.0146064415846412e-06, + "loss": 0.2061, + "step": 25222, + "teacher_loss": 0.19866423308849335 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.35575833916664124, + "learning_rate": 1.0137854716779532e-06, + "loss": 0.2383, + "step": 25223, + "teacher_loss": 0.22522330284118652 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 1.0614213943481445, + "learning_rate": 1.0129648224332321e-06, + "loss": 0.3783, + "step": 25224, + "teacher_loss": 0.30237245559692383 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5566887855529785, + "learning_rate": 1.0121444938692914e-06, + "loss": 0.2445, + "step": 25225, + "teacher_loss": 0.2097589522600174 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.3513622283935547, + "learning_rate": 1.0113244860049425e-06, + "loss": 0.1721, + "step": 25226, + "teacher_loss": 0.15221622586250305 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5891441106796265, + "learning_rate": 1.0105047988589805e-06, + "loss": 0.3517, + "step": 25227, + "teacher_loss": 0.32531094551086426 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.27161553502082825, + "learning_rate": 1.0096854324502003e-06, + "loss": 0.1852, + "step": 25228, + "teacher_loss": 0.17560729384422302 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.6531555652618408, + "learning_rate": 1.0088663867973903e-06, + "loss": 0.2342, + "step": 25229, + "teacher_loss": 0.1876084953546524 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.3394317626953125, + "learning_rate": 1.0080476619193235e-06, + "loss": 0.1726, + "step": 25230, + "teacher_loss": 0.15411494672298431 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.26204758882522583, + "learning_rate": 1.0072292578347753e-06, + "loss": 0.1699, + "step": 25231, + "teacher_loss": 0.1596796065568924 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.939587414264679, + "learning_rate": 1.0064111745625088e-06, + "loss": 0.3059, + "step": 25232, + "teacher_loss": 0.23550009727478027 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.326665461063385, + "learning_rate": 1.0055934121212773e-06, + "loss": 0.1912, + "step": 25233, + "teacher_loss": 0.17610543966293335 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5514337420463562, + "learning_rate": 1.004775970529831e-06, + "loss": 0.2334, + "step": 25234, + "teacher_loss": 0.1980901062488556 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.4697257876396179, + "learning_rate": 1.0039588498069147e-06, + "loss": 0.1951, + "step": 25235, + "teacher_loss": 0.16458944976329803 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.48229724168777466, + "learning_rate": 1.0031420499712568e-06, + "loss": 0.2314, + "step": 25236, + "teacher_loss": 0.20357276499271393 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.1518930196762085, + "learning_rate": 1.0023255710415874e-06, + "loss": 0.137, + "step": 25237, + "teacher_loss": 0.1352979987859726 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.25666019320487976, + "learning_rate": 1.001509413036628e-06, + "loss": 0.2107, + "step": 25238, + "teacher_loss": 0.2056483030319214 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.7127546072006226, + "learning_rate": 1.0006935759750873e-06, + "loss": 0.1945, + "step": 25239, + "teacher_loss": 0.13690584897994995 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.601432740688324, + "learning_rate": 9.998780598756685e-07, + "loss": 0.2148, + "step": 25240, + "teacher_loss": 0.17184464633464813 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.4649273753166199, + "learning_rate": 9.990628647570732e-07, + "loss": 0.2223, + "step": 25241, + "teacher_loss": 0.1953580379486084 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 1.0118002891540527, + "learning_rate": 9.9824799063799e-07, + "loss": 0.3141, + "step": 25242, + "teacher_loss": 0.23654355108737946 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.4516458511352539, + "learning_rate": 9.974334375370985e-07, + "loss": 0.1994, + "step": 25243, + "teacher_loss": 0.17137081921100616 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.3629573881626129, + "learning_rate": 9.96619205473076e-07, + "loss": 0.2502, + "step": 25244, + "teacher_loss": 0.23767578601837158 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.6243861317634583, + "learning_rate": 9.95805294464594e-07, + "loss": 0.2635, + "step": 25245, + "teacher_loss": 0.223362997174263 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.2881556749343872, + "learning_rate": 9.949917045303076e-07, + "loss": 0.1382, + "step": 25246, + "teacher_loss": 0.12151715159416199 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5381479263305664, + "learning_rate": 9.941784356888717e-07, + "loss": 0.2254, + "step": 25247, + "teacher_loss": 0.19066426157951355 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.41888338327407837, + "learning_rate": 9.933654879589365e-07, + "loss": 0.2088, + "step": 25248, + "teacher_loss": 0.18550518155097961 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.8377820253372192, + "learning_rate": 9.925528613591356e-07, + "loss": 0.2733, + "step": 25249, + "teacher_loss": 0.21060319244861603 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.2254316806793213, + "learning_rate": 9.917405559080956e-07, + "loss": 0.1866, + "step": 25250, + "teacher_loss": 0.18230530619621277 + }, + { + "epoch": 4.56, + "eval_exact_match": 80.58656575212866, + "eval_f1": 87.72573280071668, + "step": 25250 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.4477003216743469, + "learning_rate": 9.909285716244514e-07, + "loss": 0.1773, + "step": 25251, + "teacher_loss": 0.1472342163324356 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.3066934645175934, + "learning_rate": 9.90116908526812e-07, + "loss": 0.1576, + "step": 25252, + "teacher_loss": 0.14101171493530273 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.7134931087493896, + "learning_rate": 9.89305566633787e-07, + "loss": 0.2304, + "step": 25253, + "teacher_loss": 0.17670965194702148 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.25202542543411255, + "learning_rate": 9.8849454596398e-07, + "loss": 0.1588, + "step": 25254, + "teacher_loss": 0.14839942753314972 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5501688122749329, + "learning_rate": 9.87683846535986e-07, + "loss": 0.2076, + "step": 25255, + "teacher_loss": 0.16949230432510376 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.5021957159042358, + "learning_rate": 9.868734683683873e-07, + "loss": 0.2761, + "step": 25256, + "teacher_loss": 0.25101611018180847 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.46302330493927, + "learning_rate": 9.860634114797668e-07, + "loss": 0.2776, + "step": 25257, + "teacher_loss": 0.25701189041137695 + }, + { + "compression_loss": 0.0, + "epoch": 4.56, + "label_loss": 0.8627578616142273, + "learning_rate": 9.852536758886999e-07, + "loss": 0.3173, + "step": 25258, + "teacher_loss": 0.25668299198150635 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.20451778173446655, + "learning_rate": 9.844442616137467e-07, + "loss": 0.2113, + "step": 25259, + "teacher_loss": 0.21207863092422485 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.2381524294614792, + "learning_rate": 9.836351686734607e-07, + "loss": 0.2133, + "step": 25260, + "teacher_loss": 0.21058307588100433 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.2902122735977173, + "learning_rate": 9.82826397086402e-07, + "loss": 0.1839, + "step": 25261, + "teacher_loss": 0.17207857966423035 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.16998478770256042, + "learning_rate": 9.820179468711077e-07, + "loss": 0.134, + "step": 25262, + "teacher_loss": 0.13000991940498352 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.6748837232589722, + "learning_rate": 9.81209818046111e-07, + "loss": 0.263, + "step": 25263, + "teacher_loss": 0.2172715663909912 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.3503924608230591, + "learning_rate": 9.804020106299454e-07, + "loss": 0.1903, + "step": 25264, + "teacher_loss": 0.17248988151550293 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.7435210943222046, + "learning_rate": 9.795945246411298e-07, + "loss": 0.3259, + "step": 25265, + "teacher_loss": 0.279512882232666 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.8554823994636536, + "learning_rate": 9.78787360098174e-07, + "loss": 0.2707, + "step": 25266, + "teacher_loss": 0.20569708943367004 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.4237726330757141, + "learning_rate": 9.779805170195865e-07, + "loss": 0.2399, + "step": 25267, + "teacher_loss": 0.21951819956302643 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.5574874877929688, + "learning_rate": 9.771739954238662e-07, + "loss": 0.241, + "step": 25268, + "teacher_loss": 0.2058856338262558 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.5127108693122864, + "learning_rate": 9.76367795329503e-07, + "loss": 0.1674, + "step": 25269, + "teacher_loss": 0.12907275557518005 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.8656055927276611, + "learning_rate": 9.755619167549805e-07, + "loss": 0.319, + "step": 25270, + "teacher_loss": 0.2583044171333313 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.1763477325439453, + "learning_rate": 9.747563597187792e-07, + "loss": 0.1515, + "step": 25271, + "teacher_loss": 0.14871622622013092 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.4178406000137329, + "learning_rate": 9.739511242393606e-07, + "loss": 0.201, + "step": 25272, + "teacher_loss": 0.17692402005195618 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.3171330690383911, + "learning_rate": 9.731462103351919e-07, + "loss": 0.2162, + "step": 25273, + "teacher_loss": 0.2050376534461975 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.449001669883728, + "learning_rate": 9.72341618024728e-07, + "loss": 0.2119, + "step": 25274, + "teacher_loss": 0.1855117380619049 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.3039000928401947, + "learning_rate": 9.715373473264111e-07, + "loss": 0.1805, + "step": 25275, + "teacher_loss": 0.16683441400527954 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.2751407027244568, + "learning_rate": 9.707333982586863e-07, + "loss": 0.2364, + "step": 25276, + "teacher_loss": 0.23205478489398956 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.6350975036621094, + "learning_rate": 9.69929770839979e-07, + "loss": 0.2434, + "step": 25277, + "teacher_loss": 0.19985038042068481 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.5562041997909546, + "learning_rate": 9.691264650887194e-07, + "loss": 0.2277, + "step": 25278, + "teacher_loss": 0.19123660027980804 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.36706995964050293, + "learning_rate": 9.683234810233244e-07, + "loss": 0.2036, + "step": 25279, + "teacher_loss": 0.18545326590538025 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.2762756943702698, + "learning_rate": 9.675208186622008e-07, + "loss": 0.1833, + "step": 25280, + "teacher_loss": 0.17295041680335999 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.48622292280197144, + "learning_rate": 9.667184780237543e-07, + "loss": 0.2228, + "step": 25281, + "teacher_loss": 0.1934972107410431 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.7641993761062622, + "learning_rate": 9.65916459126378e-07, + "loss": 0.3321, + "step": 25282, + "teacher_loss": 0.2840563654899597 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.5566275119781494, + "learning_rate": 9.651147619884626e-07, + "loss": 0.2624, + "step": 25283, + "teacher_loss": 0.22965875267982483 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.6244545578956604, + "learning_rate": 9.643133866283865e-07, + "loss": 0.2302, + "step": 25284, + "teacher_loss": 0.18639524281024933 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.7283577919006348, + "learning_rate": 9.635123330645218e-07, + "loss": 0.2526, + "step": 25285, + "teacher_loss": 0.19968871772289276 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.7025970220565796, + "learning_rate": 9.62711601315237e-07, + "loss": 0.2522, + "step": 25286, + "teacher_loss": 0.2022044062614441 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.3132360279560089, + "learning_rate": 9.61911191398891e-07, + "loss": 0.2037, + "step": 25287, + "teacher_loss": 0.19150398671627045 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.36863166093826294, + "learning_rate": 9.611111033338287e-07, + "loss": 0.1989, + "step": 25288, + "teacher_loss": 0.18000632524490356 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.23424896597862244, + "learning_rate": 9.603113371384025e-07, + "loss": 0.1411, + "step": 25289, + "teacher_loss": 0.13072270154953003 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.763077974319458, + "learning_rate": 9.59511892830941e-07, + "loss": 0.1757, + "step": 25290, + "teacher_loss": 0.11044125258922577 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.6429189443588257, + "learning_rate": 9.587127704297777e-07, + "loss": 0.2867, + "step": 25291, + "teacher_loss": 0.24712969362735748 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.5208631753921509, + "learning_rate": 9.57913969953233e-07, + "loss": 0.2748, + "step": 25292, + "teacher_loss": 0.24749226868152618 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.7731572389602661, + "learning_rate": 9.57115491419619e-07, + "loss": 0.2917, + "step": 25293, + "teacher_loss": 0.2382373809814453 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.48019927740097046, + "learning_rate": 9.563173348472443e-07, + "loss": 0.2631, + "step": 25294, + "teacher_loss": 0.23893752694129944 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.6336334347724915, + "learning_rate": 9.555195002544093e-07, + "loss": 0.1813, + "step": 25295, + "teacher_loss": 0.13100528717041016 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.49347177147865295, + "learning_rate": 9.547219876594043e-07, + "loss": 0.2019, + "step": 25296, + "teacher_loss": 0.1694604456424713 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.7972241640090942, + "learning_rate": 9.539247970805115e-07, + "loss": 0.2011, + "step": 25297, + "teacher_loss": 0.13484182953834534 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.30358487367630005, + "learning_rate": 9.531279285360145e-07, + "loss": 0.2615, + "step": 25298, + "teacher_loss": 0.2568710744380951 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.49980559945106506, + "learning_rate": 9.523313820441804e-07, + "loss": 0.183, + "step": 25299, + "teacher_loss": 0.14778810739517212 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.4036256670951843, + "learning_rate": 9.515351576232645e-07, + "loss": 0.2776, + "step": 25300, + "teacher_loss": 0.2635723948478699 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.42805999517440796, + "learning_rate": 9.50739255291534e-07, + "loss": 0.1989, + "step": 25301, + "teacher_loss": 0.17340236902236938 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.42422056198120117, + "learning_rate": 9.499436750672291e-07, + "loss": 0.1837, + "step": 25302, + "teacher_loss": 0.1570090353488922 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.27697575092315674, + "learning_rate": 9.491484169685888e-07, + "loss": 0.1608, + "step": 25303, + "teacher_loss": 0.14786545932292938 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.361478328704834, + "learning_rate": 9.4835348101385e-07, + "loss": 0.1695, + "step": 25304, + "teacher_loss": 0.14815561473369598 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.44979333877563477, + "learning_rate": 9.475588672212381e-07, + "loss": 0.1626, + "step": 25305, + "teacher_loss": 0.13063758611679077 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.3927963972091675, + "learning_rate": 9.467645756089687e-07, + "loss": 0.2498, + "step": 25306, + "teacher_loss": 0.23389843106269836 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.141736701130867, + "learning_rate": 9.459706061952539e-07, + "loss": 0.1342, + "step": 25307, + "teacher_loss": 0.13338759541511536 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.19363108277320862, + "learning_rate": 9.451769589982989e-07, + "loss": 0.1589, + "step": 25308, + "teacher_loss": 0.1550879180431366 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.4960261583328247, + "learning_rate": 9.443836340362943e-07, + "loss": 0.1793, + "step": 25309, + "teacher_loss": 0.14409592747688293 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.6943809986114502, + "learning_rate": 9.43590631327434e-07, + "loss": 0.2452, + "step": 25310, + "teacher_loss": 0.19528554379940033 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.3955976963043213, + "learning_rate": 9.427979508898982e-07, + "loss": 0.189, + "step": 25311, + "teacher_loss": 0.16609206795692444 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.43434083461761475, + "learning_rate": 9.420055927418592e-07, + "loss": 0.2621, + "step": 25312, + "teacher_loss": 0.24300166964530945 + }, + { + "compression_loss": 0.0, + "epoch": 4.57, + "label_loss": 0.15725907683372498, + "learning_rate": 9.412135569014807e-07, + "loss": 0.1492, + "step": 25313, + "teacher_loss": 0.14826278388500214 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.6298251152038574, + "learning_rate": 9.404218433869283e-07, + "loss": 0.1738, + "step": 25314, + "teacher_loss": 0.12315648049116135 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.16814552247524261, + "learning_rate": 9.396304522163506e-07, + "loss": 0.2239, + "step": 25315, + "teacher_loss": 0.23012548685073853 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.24124358594417572, + "learning_rate": 9.388393834078901e-07, + "loss": 0.197, + "step": 25316, + "teacher_loss": 0.1921166479587555 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.35338687896728516, + "learning_rate": 9.380486369796837e-07, + "loss": 0.2266, + "step": 25317, + "teacher_loss": 0.21254099905490875 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5619615316390991, + "learning_rate": 9.372582129498652e-07, + "loss": 0.2827, + "step": 25318, + "teacher_loss": 0.25167083740234375 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.31642550230026245, + "learning_rate": 9.364681113365519e-07, + "loss": 0.2147, + "step": 25319, + "teacher_loss": 0.2033807635307312 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.4680515229701996, + "learning_rate": 9.356783321578594e-07, + "loss": 0.1904, + "step": 25320, + "teacher_loss": 0.15952345728874207 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.3304011821746826, + "learning_rate": 9.348888754318979e-07, + "loss": 0.1768, + "step": 25321, + "teacher_loss": 0.15976056456565857 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.4386477470397949, + "learning_rate": 9.340997411767649e-07, + "loss": 0.2116, + "step": 25322, + "teacher_loss": 0.1863684207201004 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5711950063705444, + "learning_rate": 9.333109294105491e-07, + "loss": 0.2171, + "step": 25323, + "teacher_loss": 0.17780913412570953 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5650689601898193, + "learning_rate": 9.325224401513427e-07, + "loss": 0.2189, + "step": 25324, + "teacher_loss": 0.18039898574352264 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.36873894929885864, + "learning_rate": 9.317342734172213e-07, + "loss": 0.2346, + "step": 25325, + "teacher_loss": 0.2196635603904724 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.4458620548248291, + "learning_rate": 9.309464292262521e-07, + "loss": 0.2114, + "step": 25326, + "teacher_loss": 0.1853460669517517 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.3334934115409851, + "learning_rate": 9.301589075965005e-07, + "loss": 0.1802, + "step": 25327, + "teacher_loss": 0.16317932307720184 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5920121669769287, + "learning_rate": 9.293717085460223e-07, + "loss": 0.2296, + "step": 25328, + "teacher_loss": 0.18938563764095306 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.292266309261322, + "learning_rate": 9.285848320928647e-07, + "loss": 0.186, + "step": 25329, + "teacher_loss": 0.1741389036178589 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.19360080361366272, + "learning_rate": 9.277982782550664e-07, + "loss": 0.1393, + "step": 25330, + "teacher_loss": 0.13323110342025757 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.29439884424209595, + "learning_rate": 9.270120470506666e-07, + "loss": 0.1675, + "step": 25331, + "teacher_loss": 0.15343180298805237 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2746466100215912, + "learning_rate": 9.262261384976855e-07, + "loss": 0.1783, + "step": 25332, + "teacher_loss": 0.1676362156867981 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.6531040668487549, + "learning_rate": 9.254405526141424e-07, + "loss": 0.2108, + "step": 25333, + "teacher_loss": 0.16169428825378418 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2605685889720917, + "learning_rate": 9.246552894180526e-07, + "loss": 0.1281, + "step": 25334, + "teacher_loss": 0.11335714906454086 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.4879167973995209, + "learning_rate": 9.238703489274153e-07, + "loss": 0.2511, + "step": 25335, + "teacher_loss": 0.22475658357143402 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.3654537796974182, + "learning_rate": 9.230857311602275e-07, + "loss": 0.2087, + "step": 25336, + "teacher_loss": 0.19128525257110596 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.14657816290855408, + "learning_rate": 9.223014361344817e-07, + "loss": 0.1383, + "step": 25337, + "teacher_loss": 0.13733136653900146 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.30210134387016296, + "learning_rate": 9.21517463868155e-07, + "loss": 0.1497, + "step": 25338, + "teacher_loss": 0.13279327750205994 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.440632700920105, + "learning_rate": 9.207338143792266e-07, + "loss": 0.2114, + "step": 25339, + "teacher_loss": 0.18590989708900452 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5739099383354187, + "learning_rate": 9.199504876856551e-07, + "loss": 0.187, + "step": 25340, + "teacher_loss": 0.14398100972175598 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.41731682419776917, + "learning_rate": 9.191674838054065e-07, + "loss": 0.1994, + "step": 25341, + "teacher_loss": 0.17516663670539856 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.598284900188446, + "learning_rate": 9.183848027564329e-07, + "loss": 0.2804, + "step": 25342, + "teacher_loss": 0.24504326283931732 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2813422679901123, + "learning_rate": 9.17602444556675e-07, + "loss": 0.2267, + "step": 25343, + "teacher_loss": 0.22058749198913574 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2508302330970764, + "learning_rate": 9.168204092240734e-07, + "loss": 0.1999, + "step": 25344, + "teacher_loss": 0.19422651827335358 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5090104341506958, + "learning_rate": 9.160386967765538e-07, + "loss": 0.2434, + "step": 25345, + "teacher_loss": 0.21392107009887695 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.29601308703422546, + "learning_rate": 9.152573072320419e-07, + "loss": 0.1808, + "step": 25346, + "teacher_loss": 0.16801732778549194 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2985702157020569, + "learning_rate": 9.144762406084534e-07, + "loss": 0.1842, + "step": 25347, + "teacher_loss": 0.17152288556098938 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5517145395278931, + "learning_rate": 9.136954969236922e-07, + "loss": 0.2096, + "step": 25348, + "teacher_loss": 0.17156654596328735 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2555495798587799, + "learning_rate": 9.129150761956623e-07, + "loss": 0.2071, + "step": 25349, + "teacher_loss": 0.2017345130443573 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.3936951756477356, + "learning_rate": 9.121349784422528e-07, + "loss": 0.2553, + "step": 25350, + "teacher_loss": 0.23988866806030273 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.43336057662963867, + "learning_rate": 9.113552036813494e-07, + "loss": 0.1636, + "step": 25351, + "teacher_loss": 0.13367554545402527 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.22284942865371704, + "learning_rate": 9.105757519308344e-07, + "loss": 0.1548, + "step": 25352, + "teacher_loss": 0.1472913920879364 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.3418649733066559, + "learning_rate": 9.097966232085736e-07, + "loss": 0.1777, + "step": 25353, + "teacher_loss": 0.1594507098197937 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.9070267081260681, + "learning_rate": 9.090178175324293e-07, + "loss": 0.5334, + "step": 25354, + "teacher_loss": 0.49193209409713745 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2651379108428955, + "learning_rate": 9.082393349202639e-07, + "loss": 0.1697, + "step": 25355, + "teacher_loss": 0.15907517075538635 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.21438665688037872, + "learning_rate": 9.074611753899181e-07, + "loss": 0.1543, + "step": 25356, + "teacher_loss": 0.14762771129608154 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.33189982175827026, + "learning_rate": 9.066833389592361e-07, + "loss": 0.2081, + "step": 25357, + "teacher_loss": 0.19438773393630981 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.6817593574523926, + "learning_rate": 9.059058256460533e-07, + "loss": 0.3055, + "step": 25358, + "teacher_loss": 0.263639897108078 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5586743354797363, + "learning_rate": 9.051286354681909e-07, + "loss": 0.2286, + "step": 25359, + "teacher_loss": 0.1919099986553192 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.1953689455986023, + "learning_rate": 9.04351768443471e-07, + "loss": 0.1554, + "step": 25360, + "teacher_loss": 0.15094473958015442 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.2034609019756317, + "learning_rate": 9.035752245897061e-07, + "loss": 0.169, + "step": 25361, + "teacher_loss": 0.16513386368751526 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5530673265457153, + "learning_rate": 9.027990039246986e-07, + "loss": 0.2197, + "step": 25362, + "teacher_loss": 0.18267199397087097 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5022286772727966, + "learning_rate": 9.020231064662393e-07, + "loss": 0.1652, + "step": 25363, + "teacher_loss": 0.12779076397418976 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.34399715065956116, + "learning_rate": 9.012475322321273e-07, + "loss": 0.2383, + "step": 25364, + "teacher_loss": 0.2266007959842682 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.24834898114204407, + "learning_rate": 9.004722812401383e-07, + "loss": 0.162, + "step": 25365, + "teacher_loss": 0.1523614227771759 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.37374264001846313, + "learning_rate": 8.996973535080449e-07, + "loss": 0.2024, + "step": 25366, + "teacher_loss": 0.1833898425102234 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.5713999271392822, + "learning_rate": 8.989227490536162e-07, + "loss": 0.2242, + "step": 25367, + "teacher_loss": 0.1855934113264084 + }, + { + "compression_loss": 0.0, + "epoch": 4.58, + "label_loss": 0.7535400986671448, + "learning_rate": 8.981484678946145e-07, + "loss": 0.2671, + "step": 25368, + "teacher_loss": 0.2130289077758789 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.9359387159347534, + "learning_rate": 8.973745100487873e-07, + "loss": 0.2699, + "step": 25369, + "teacher_loss": 0.19590634107589722 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.6134337186813354, + "learning_rate": 8.96600875533879e-07, + "loss": 0.3052, + "step": 25370, + "teacher_loss": 0.27093273401260376 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2725353538990021, + "learning_rate": 8.958275643676317e-07, + "loss": 0.1799, + "step": 25371, + "teacher_loss": 0.16961748898029327 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.9865927696228027, + "learning_rate": 8.950545765677732e-07, + "loss": 0.3561, + "step": 25372, + "teacher_loss": 0.28599682450294495 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2887454032897949, + "learning_rate": 8.942819121520174e-07, + "loss": 0.2021, + "step": 25373, + "teacher_loss": 0.19245608150959015 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3581439256668091, + "learning_rate": 8.93509571138092e-07, + "loss": 0.2798, + "step": 25374, + "teacher_loss": 0.271075963973999 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.592839777469635, + "learning_rate": 8.927375535436993e-07, + "loss": 0.2574, + "step": 25375, + "teacher_loss": 0.2201356589794159 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.28875532746315, + "learning_rate": 8.919658593865354e-07, + "loss": 0.197, + "step": 25376, + "teacher_loss": 0.18677294254302979 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2306651473045349, + "learning_rate": 8.91194488684296e-07, + "loss": 0.1379, + "step": 25377, + "teacher_loss": 0.12754932045936584 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3293890953063965, + "learning_rate": 8.904234414546702e-07, + "loss": 0.1687, + "step": 25378, + "teacher_loss": 0.15081578493118286 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.9479986429214478, + "learning_rate": 8.896527177153291e-07, + "loss": 0.3974, + "step": 25379, + "teacher_loss": 0.33618634939193726 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.232370525598526, + "learning_rate": 8.888823174839467e-07, + "loss": 0.144, + "step": 25380, + "teacher_loss": 0.13421492278575897 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.31608468294143677, + "learning_rate": 8.881122407781872e-07, + "loss": 0.1818, + "step": 25381, + "teacher_loss": 0.16687358915805817 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.5733442306518555, + "learning_rate": 8.873424876157016e-07, + "loss": 0.2315, + "step": 25382, + "teacher_loss": 0.19351038336753845 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.565104603767395, + "learning_rate": 8.865730580141424e-07, + "loss": 0.2192, + "step": 25383, + "teacher_loss": 0.18073466420173645 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.4101666808128357, + "learning_rate": 8.858039519911487e-07, + "loss": 0.2256, + "step": 25384, + "teacher_loss": 0.20506048202514648 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.35804229974746704, + "learning_rate": 8.85035169564355e-07, + "loss": 0.1892, + "step": 25385, + "teacher_loss": 0.17042624950408936 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.34658539295196533, + "learning_rate": 8.842667107513819e-07, + "loss": 0.1539, + "step": 25386, + "teacher_loss": 0.13248321413993835 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.16672475636005402, + "learning_rate": 8.834985755698571e-07, + "loss": 0.1227, + "step": 25387, + "teacher_loss": 0.11782631278038025 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3592228889465332, + "learning_rate": 8.82730764037385e-07, + "loss": 0.1992, + "step": 25388, + "teacher_loss": 0.18137149512767792 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.28662294149398804, + "learning_rate": 8.819632761715696e-07, + "loss": 0.2121, + "step": 25389, + "teacher_loss": 0.2037946730852127 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.31958743929862976, + "learning_rate": 8.811961119900069e-07, + "loss": 0.2149, + "step": 25390, + "teacher_loss": 0.20322871208190918 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.29854342341423035, + "learning_rate": 8.804292715102897e-07, + "loss": 0.2527, + "step": 25391, + "teacher_loss": 0.24761945009231567 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.22436173260211945, + "learning_rate": 8.796627547499953e-07, + "loss": 0.2289, + "step": 25392, + "teacher_loss": 0.2293996512889862 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.6443582773208618, + "learning_rate": 8.78896561726698e-07, + "loss": 0.2623, + "step": 25393, + "teacher_loss": 0.21979567408561707 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.36847367882728577, + "learning_rate": 8.781306924579674e-07, + "loss": 0.283, + "step": 25394, + "teacher_loss": 0.27347368001937866 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.46845197677612305, + "learning_rate": 8.773651469613592e-07, + "loss": 0.1951, + "step": 25395, + "teacher_loss": 0.16477200388908386 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.38287344574928284, + "learning_rate": 8.76599925254426e-07, + "loss": 0.2299, + "step": 25396, + "teacher_loss": 0.2128680944442749 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.6750470399856567, + "learning_rate": 8.758350273547138e-07, + "loss": 0.287, + "step": 25397, + "teacher_loss": 0.24384649097919464 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.5757938623428345, + "learning_rate": 8.75070453279757e-07, + "loss": 0.2686, + "step": 25398, + "teacher_loss": 0.23442384600639343 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.17242567241191864, + "learning_rate": 8.743062030470867e-07, + "loss": 0.2021, + "step": 25399, + "teacher_loss": 0.20536476373672485 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.4470711052417755, + "learning_rate": 8.735422766742235e-07, + "loss": 0.2164, + "step": 25400, + "teacher_loss": 0.19080102443695068 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3072819113731384, + "learning_rate": 8.72778674178682e-07, + "loss": 0.1986, + "step": 25401, + "teacher_loss": 0.18655487895011902 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3940732181072235, + "learning_rate": 8.720153955779714e-07, + "loss": 0.1761, + "step": 25402, + "teacher_loss": 0.15186211466789246 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.42614316940307617, + "learning_rate": 8.712524408895878e-07, + "loss": 0.2166, + "step": 25403, + "teacher_loss": 0.19327794015407562 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2905365824699402, + "learning_rate": 8.704898101310272e-07, + "loss": 0.1825, + "step": 25404, + "teacher_loss": 0.17052319645881653 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 1.0438623428344727, + "learning_rate": 8.697275033197738e-07, + "loss": 0.3083, + "step": 25405, + "teacher_loss": 0.2266244888305664 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.4183996319770813, + "learning_rate": 8.689655204733004e-07, + "loss": 0.2045, + "step": 25406, + "teacher_loss": 0.18072031438350677 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2403639853000641, + "learning_rate": 8.682038616090848e-07, + "loss": 0.1643, + "step": 25407, + "teacher_loss": 0.15587864816188812 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.5101862549781799, + "learning_rate": 8.674425267445829e-07, + "loss": 0.163, + "step": 25408, + "teacher_loss": 0.12441083043813705 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.5326485633850098, + "learning_rate": 8.666815158972507e-07, + "loss": 0.2274, + "step": 25409, + "teacher_loss": 0.19348041713237762 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.4743402600288391, + "learning_rate": 8.659208290845411e-07, + "loss": 0.1399, + "step": 25410, + "teacher_loss": 0.10278697311878204 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3294079303741455, + "learning_rate": 8.651604663238882e-07, + "loss": 0.2003, + "step": 25411, + "teacher_loss": 0.18599094450473785 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.37071382999420166, + "learning_rate": 8.6440042763273e-07, + "loss": 0.2079, + "step": 25412, + "teacher_loss": 0.18985579907894135 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.26673635840415955, + "learning_rate": 8.636407130284857e-07, + "loss": 0.163, + "step": 25413, + "teacher_loss": 0.1514323651790619 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.3823966383934021, + "learning_rate": 8.628813225285781e-07, + "loss": 0.1923, + "step": 25414, + "teacher_loss": 0.17113789916038513 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2793274521827698, + "learning_rate": 8.621222561504183e-07, + "loss": 0.1336, + "step": 25415, + "teacher_loss": 0.11739656329154968 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.46780461072921753, + "learning_rate": 8.613635139114057e-07, + "loss": 0.2251, + "step": 25416, + "teacher_loss": 0.1981765627861023 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.4738069772720337, + "learning_rate": 8.60605095828938e-07, + "loss": 0.1998, + "step": 25417, + "teacher_loss": 0.16933059692382812 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2628645598888397, + "learning_rate": 8.598470019204047e-07, + "loss": 0.2004, + "step": 25418, + "teacher_loss": 0.19349327683448792 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.5679659247398376, + "learning_rate": 8.590892322031835e-07, + "loss": 0.2242, + "step": 25419, + "teacher_loss": 0.18597060441970825 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.2972109317779541, + "learning_rate": 8.583317866946506e-07, + "loss": 0.1537, + "step": 25420, + "teacher_loss": 0.13780678808689117 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.7761539816856384, + "learning_rate": 8.575746654121719e-07, + "loss": 0.2729, + "step": 25421, + "teacher_loss": 0.21698924899101257 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.29120928049087524, + "learning_rate": 8.568178683731054e-07, + "loss": 0.2246, + "step": 25422, + "teacher_loss": 0.21719610691070557 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.32808685302734375, + "learning_rate": 8.560613955947971e-07, + "loss": 0.2747, + "step": 25423, + "teacher_loss": 0.2687840163707733 + }, + { + "compression_loss": 0.0, + "epoch": 4.59, + "label_loss": 0.544015109539032, + "learning_rate": 8.553052470945999e-07, + "loss": 0.2382, + "step": 25424, + "teacher_loss": 0.20420145988464355 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.7357940673828125, + "learning_rate": 8.545494228898448e-07, + "loss": 0.198, + "step": 25425, + "teacher_loss": 0.13827836513519287 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.34699589014053345, + "learning_rate": 8.537939229978597e-07, + "loss": 0.1623, + "step": 25426, + "teacher_loss": 0.14179080724716187 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.4853142499923706, + "learning_rate": 8.530387474359658e-07, + "loss": 0.1806, + "step": 25427, + "teacher_loss": 0.14677976071834564 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.40237534046173096, + "learning_rate": 8.522838962214824e-07, + "loss": 0.1852, + "step": 25428, + "teacher_loss": 0.1610666811466217 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.43966007232666016, + "learning_rate": 8.515293693717091e-07, + "loss": 0.2458, + "step": 25429, + "teacher_loss": 0.22429212927818298 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.31295913457870483, + "learning_rate": 8.507751669039488e-07, + "loss": 0.2676, + "step": 25430, + "teacher_loss": 0.26260995864868164 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5544017553329468, + "learning_rate": 8.500212888354958e-07, + "loss": 0.2419, + "step": 25431, + "teacher_loss": 0.20718058943748474 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.36435991525650024, + "learning_rate": 8.492677351836264e-07, + "loss": 0.1754, + "step": 25432, + "teacher_loss": 0.15440791845321655 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.24574732780456543, + "learning_rate": 8.485145059656219e-07, + "loss": 0.1964, + "step": 25433, + "teacher_loss": 0.19092130661010742 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5613336563110352, + "learning_rate": 8.477616011987549e-07, + "loss": 0.2638, + "step": 25434, + "teacher_loss": 0.23075279593467712 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.40825074911117554, + "learning_rate": 8.470090209002835e-07, + "loss": 0.2068, + "step": 25435, + "teacher_loss": 0.18436145782470703 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.6100513339042664, + "learning_rate": 8.46256765087457e-07, + "loss": 0.264, + "step": 25436, + "teacher_loss": 0.22552308440208435 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.30441269278526306, + "learning_rate": 8.455048337775334e-07, + "loss": 0.2179, + "step": 25437, + "teacher_loss": 0.20833390951156616 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.16952374577522278, + "learning_rate": 8.447532269877455e-07, + "loss": 0.1545, + "step": 25438, + "teacher_loss": 0.15283679962158203 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.2949760854244232, + "learning_rate": 8.440019447353248e-07, + "loss": 0.2235, + "step": 25439, + "teacher_loss": 0.2156073898077011 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.6164042353630066, + "learning_rate": 8.432509870374971e-07, + "loss": 0.381, + "step": 25440, + "teacher_loss": 0.35486334562301636 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.4431574046611786, + "learning_rate": 8.42500353911484e-07, + "loss": 0.1823, + "step": 25441, + "teacher_loss": 0.1533556878566742 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3262184262275696, + "learning_rate": 8.417500453744864e-07, + "loss": 0.1657, + "step": 25442, + "teacher_loss": 0.14784082770347595 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3469354510307312, + "learning_rate": 8.410000614437141e-07, + "loss": 0.1779, + "step": 25443, + "teacher_loss": 0.1590634137392044 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.4059832692146301, + "learning_rate": 8.402504021363599e-07, + "loss": 0.2614, + "step": 25444, + "teacher_loss": 0.24531367421150208 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.15559996664524078, + "learning_rate": 8.395010674696102e-07, + "loss": 0.1235, + "step": 25445, + "teacher_loss": 0.11989938467741013 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.26305776834487915, + "learning_rate": 8.387520574606444e-07, + "loss": 0.2509, + "step": 25446, + "teacher_loss": 0.2495480477809906 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.7707770466804504, + "learning_rate": 8.380033721266405e-07, + "loss": 0.2783, + "step": 25447, + "teacher_loss": 0.22362074255943298 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 1.3704841136932373, + "learning_rate": 8.372550114847566e-07, + "loss": 0.4846, + "step": 25448, + "teacher_loss": 0.38620078563690186 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.47335827350616455, + "learning_rate": 8.365069755521537e-07, + "loss": 0.2122, + "step": 25449, + "teacher_loss": 0.18320009112358093 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.6355802416801453, + "learning_rate": 8.3575926434598e-07, + "loss": 0.2603, + "step": 25450, + "teacher_loss": 0.21854686737060547 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5063444375991821, + "learning_rate": 8.350118778833815e-07, + "loss": 0.193, + "step": 25451, + "teacher_loss": 0.15817567706108093 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3749358355998993, + "learning_rate": 8.342648161814898e-07, + "loss": 0.2237, + "step": 25452, + "teacher_loss": 0.20687636733055115 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.4512612819671631, + "learning_rate": 8.33518079257436e-07, + "loss": 0.2675, + "step": 25453, + "teacher_loss": 0.247114896774292 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5963268280029297, + "learning_rate": 8.327716671283414e-07, + "loss": 0.2191, + "step": 25454, + "teacher_loss": 0.17713937163352966 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3764534294605255, + "learning_rate": 8.320255798113141e-07, + "loss": 0.2034, + "step": 25455, + "teacher_loss": 0.1842138022184372 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.6379756331443787, + "learning_rate": 8.31279817323462e-07, + "loss": 0.2181, + "step": 25456, + "teacher_loss": 0.17144066095352173 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.2761960029602051, + "learning_rate": 8.305343796818865e-07, + "loss": 0.152, + "step": 25457, + "teacher_loss": 0.1382140964269638 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.23853370547294617, + "learning_rate": 8.297892669036721e-07, + "loss": 0.1809, + "step": 25458, + "teacher_loss": 0.17454522848129272 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.30613529682159424, + "learning_rate": 8.290444790059071e-07, + "loss": 0.1841, + "step": 25459, + "teacher_loss": 0.17050202190876007 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.42957621812820435, + "learning_rate": 8.283000160056658e-07, + "loss": 0.2089, + "step": 25460, + "teacher_loss": 0.18433448672294617 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.48454004526138306, + "learning_rate": 8.275558779200148e-07, + "loss": 0.1935, + "step": 25461, + "teacher_loss": 0.16119526326656342 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.2686091661453247, + "learning_rate": 8.268120647660188e-07, + "loss": 0.1596, + "step": 25462, + "teacher_loss": 0.14749515056610107 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.47633621096611023, + "learning_rate": 8.260685765607273e-07, + "loss": 0.1991, + "step": 25463, + "teacher_loss": 0.16829344630241394 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.7268596887588501, + "learning_rate": 8.253254133211869e-07, + "loss": 0.2901, + "step": 25464, + "teacher_loss": 0.2415502965450287 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.6045467853546143, + "learning_rate": 8.245825750644387e-07, + "loss": 0.2015, + "step": 25465, + "teacher_loss": 0.15673410892486572 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3046342730522156, + "learning_rate": 8.238400618075109e-07, + "loss": 0.1791, + "step": 25466, + "teacher_loss": 0.1651729792356491 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5204113125801086, + "learning_rate": 8.230978735674266e-07, + "loss": 0.2551, + "step": 25467, + "teacher_loss": 0.22562071681022644 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.9977778792381287, + "learning_rate": 8.223560103612071e-07, + "loss": 0.2611, + "step": 25468, + "teacher_loss": 0.17923936247825623 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.14617115259170532, + "learning_rate": 8.216144722058539e-07, + "loss": 0.1661, + "step": 25469, + "teacher_loss": 0.16832619905471802 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5440880060195923, + "learning_rate": 8.208732591183749e-07, + "loss": 0.2379, + "step": 25470, + "teacher_loss": 0.2038961946964264 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.8916402459144592, + "learning_rate": 8.201323711157583e-07, + "loss": 0.3007, + "step": 25471, + "teacher_loss": 0.2350865602493286 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.4210309386253357, + "learning_rate": 8.193918082149954e-07, + "loss": 0.2552, + "step": 25472, + "teacher_loss": 0.23679476976394653 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3036521077156067, + "learning_rate": 8.18651570433061e-07, + "loss": 0.1956, + "step": 25473, + "teacher_loss": 0.1836080402135849 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5067384243011475, + "learning_rate": 8.179116577869283e-07, + "loss": 0.2125, + "step": 25474, + "teacher_loss": 0.17984187602996826 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.5642558336257935, + "learning_rate": 8.17172070293562e-07, + "loss": 0.2493, + "step": 25475, + "teacher_loss": 0.2143518030643463 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.2145422250032425, + "learning_rate": 8.164328079699168e-07, + "loss": 0.1587, + "step": 25476, + "teacher_loss": 0.15252180397510529 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.4642295837402344, + "learning_rate": 8.156938708329425e-07, + "loss": 0.2059, + "step": 25477, + "teacher_loss": 0.1772378534078598 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.8076434135437012, + "learning_rate": 8.149552588995824e-07, + "loss": 0.2553, + "step": 25478, + "teacher_loss": 0.1938788741827011 + }, + { + "compression_loss": 0.0, + "epoch": 4.6, + "label_loss": 0.3025141656398773, + "learning_rate": 8.142169721867676e-07, + "loss": 0.2193, + "step": 25479, + "teacher_loss": 0.21002745628356934 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3230145573616028, + "learning_rate": 8.134790107114265e-07, + "loss": 0.204, + "step": 25480, + "teacher_loss": 0.19083133339881897 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.6536225080490112, + "learning_rate": 8.127413744904805e-07, + "loss": 0.2325, + "step": 25481, + "teacher_loss": 0.18567653000354767 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.7507622241973877, + "learning_rate": 8.120040635408377e-07, + "loss": 0.2449, + "step": 25482, + "teacher_loss": 0.1886594593524933 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.854945719242096, + "learning_rate": 8.112670778794029e-07, + "loss": 0.2485, + "step": 25483, + "teacher_loss": 0.18110023438930511 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.2710120975971222, + "learning_rate": 8.10530417523076e-07, + "loss": 0.1533, + "step": 25484, + "teacher_loss": 0.14023497700691223 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.6301982402801514, + "learning_rate": 8.097940824887451e-07, + "loss": 0.2576, + "step": 25485, + "teacher_loss": 0.21624766290187836 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.11365041881799698, + "learning_rate": 8.090580727932867e-07, + "loss": 0.1281, + "step": 25486, + "teacher_loss": 0.12970973551273346 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.525229811668396, + "learning_rate": 8.08322388453584e-07, + "loss": 0.1991, + "step": 25487, + "teacher_loss": 0.16285625100135803 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3677288293838501, + "learning_rate": 8.075870294865018e-07, + "loss": 0.2031, + "step": 25488, + "teacher_loss": 0.18481279909610748 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3909282088279724, + "learning_rate": 8.068519959088932e-07, + "loss": 0.2642, + "step": 25489, + "teacher_loss": 0.2501263916492462 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3609829843044281, + "learning_rate": 8.061172877376166e-07, + "loss": 0.1573, + "step": 25490, + "teacher_loss": 0.13462717831134796 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.49601975083351135, + "learning_rate": 8.053829049895184e-07, + "loss": 0.2005, + "step": 25491, + "teacher_loss": 0.1676623672246933 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3902442455291748, + "learning_rate": 8.046488476814284e-07, + "loss": 0.236, + "step": 25492, + "teacher_loss": 0.21890190243721008 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 1.0367602109909058, + "learning_rate": 8.039151158301816e-07, + "loss": 0.3502, + "step": 25493, + "teacher_loss": 0.27386409044265747 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.39509326219558716, + "learning_rate": 8.031817094526012e-07, + "loss": 0.1843, + "step": 25494, + "teacher_loss": 0.16091583669185638 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.6413173675537109, + "learning_rate": 8.024486285654986e-07, + "loss": 0.2566, + "step": 25495, + "teacher_loss": 0.2138597071170807 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.9523268342018127, + "learning_rate": 8.017158731856788e-07, + "loss": 0.301, + "step": 25496, + "teacher_loss": 0.22865082323551178 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 1.1157842874526978, + "learning_rate": 8.009834433299502e-07, + "loss": 0.2772, + "step": 25497, + "teacher_loss": 0.18404605984687805 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.28363293409347534, + "learning_rate": 8.002513390151007e-07, + "loss": 0.2075, + "step": 25498, + "teacher_loss": 0.19903671741485596 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.6049840450286865, + "learning_rate": 7.995195602579119e-07, + "loss": 0.1992, + "step": 25499, + "teacher_loss": 0.15406033396720886 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.37473469972610474, + "learning_rate": 7.98788107075164e-07, + "loss": 0.2156, + "step": 25500, + "teacher_loss": 0.19788554310798645 + }, + { + "epoch": 4.61, + "eval_exact_match": 80.3027436140019, + "eval_f1": 87.60761573294316, + "step": 25500 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.5121930837631226, + "learning_rate": 7.980569794836318e-07, + "loss": 0.2816, + "step": 25501, + "teacher_loss": 0.25595369935035706 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.6750133037567139, + "learning_rate": 7.973261775000684e-07, + "loss": 0.2871, + "step": 25502, + "teacher_loss": 0.24401862919330597 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.749721348285675, + "learning_rate": 7.965957011412355e-07, + "loss": 0.249, + "step": 25503, + "teacher_loss": 0.19331002235412598 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.2263377606868744, + "learning_rate": 7.958655504238815e-07, + "loss": 0.1633, + "step": 25504, + "teacher_loss": 0.15634959936141968 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.20941588282585144, + "learning_rate": 7.951357253647412e-07, + "loss": 0.1195, + "step": 25505, + "teacher_loss": 0.10953295230865479 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.41801372170448303, + "learning_rate": 7.944062259805513e-07, + "loss": 0.214, + "step": 25506, + "teacher_loss": 0.19129467010498047 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.306363046169281, + "learning_rate": 7.936770522880365e-07, + "loss": 0.1807, + "step": 25507, + "teacher_loss": 0.16670529544353485 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.24499621987342834, + "learning_rate": 7.929482043039137e-07, + "loss": 0.1745, + "step": 25508, + "teacher_loss": 0.16666947305202484 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.295045405626297, + "learning_rate": 7.922196820448929e-07, + "loss": 0.1857, + "step": 25509, + "teacher_loss": 0.17360520362854004 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.39220762252807617, + "learning_rate": 7.914914855276806e-07, + "loss": 0.1692, + "step": 25510, + "teacher_loss": 0.14439141750335693 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.25716090202331543, + "learning_rate": 7.907636147689684e-07, + "loss": 0.1795, + "step": 25511, + "teacher_loss": 0.17090007662773132 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.4754082262516022, + "learning_rate": 7.900360697854431e-07, + "loss": 0.2108, + "step": 25512, + "teacher_loss": 0.18141329288482666 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.24112233519554138, + "learning_rate": 7.893088505937862e-07, + "loss": 0.1868, + "step": 25513, + "teacher_loss": 0.18073318898677826 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3661770224571228, + "learning_rate": 7.885819572106745e-07, + "loss": 0.2132, + "step": 25514, + "teacher_loss": 0.19624172151088715 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.7823975682258606, + "learning_rate": 7.878553896527696e-07, + "loss": 0.2745, + "step": 25515, + "teacher_loss": 0.2180669605731964 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.40380364656448364, + "learning_rate": 7.871291479367281e-07, + "loss": 0.1888, + "step": 25516, + "teacher_loss": 0.164909228682518 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 1.0214440822601318, + "learning_rate": 7.86403232079207e-07, + "loss": 0.3167, + "step": 25517, + "teacher_loss": 0.23839518427848816 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.210858553647995, + "learning_rate": 7.856776420968409e-07, + "loss": 0.1678, + "step": 25518, + "teacher_loss": 0.1630110740661621 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.24769769608974457, + "learning_rate": 7.849523780062717e-07, + "loss": 0.202, + "step": 25519, + "teacher_loss": 0.19696620106697083 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.4556310176849365, + "learning_rate": 7.842274398241262e-07, + "loss": 0.2185, + "step": 25520, + "teacher_loss": 0.19211503863334656 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.4361347556114197, + "learning_rate": 7.835028275670225e-07, + "loss": 0.2345, + "step": 25521, + "teacher_loss": 0.212068572640419 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3823814392089844, + "learning_rate": 7.827785412515792e-07, + "loss": 0.1816, + "step": 25522, + "teacher_loss": 0.15929976105690002 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.7110509872436523, + "learning_rate": 7.820545808943947e-07, + "loss": 0.4805, + "step": 25523, + "teacher_loss": 0.4548507630825043 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 1.0242714881896973, + "learning_rate": 7.813309465120721e-07, + "loss": 0.2745, + "step": 25524, + "teacher_loss": 0.1912326216697693 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.424579381942749, + "learning_rate": 7.806076381212018e-07, + "loss": 0.1867, + "step": 25525, + "teacher_loss": 0.16026383638381958 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.10036291182041168, + "learning_rate": 7.798846557383655e-07, + "loss": 0.1271, + "step": 25526, + "teacher_loss": 0.13009056448936462 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3729804754257202, + "learning_rate": 7.791619993801413e-07, + "loss": 0.1984, + "step": 25527, + "teacher_loss": 0.17896929383277893 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.5194668769836426, + "learning_rate": 7.784396690630963e-07, + "loss": 0.2131, + "step": 25528, + "teacher_loss": 0.17905762791633606 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.6632795333862305, + "learning_rate": 7.777176648037887e-07, + "loss": 0.2371, + "step": 25529, + "teacher_loss": 0.18977192044258118 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.18019890785217285, + "learning_rate": 7.769959866187787e-07, + "loss": 0.1243, + "step": 25530, + "teacher_loss": 0.11805213987827301 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3497493267059326, + "learning_rate": 7.762746345246046e-07, + "loss": 0.2343, + "step": 25531, + "teacher_loss": 0.22145143151283264 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.1685025691986084, + "learning_rate": 7.755536085378067e-07, + "loss": 0.1656, + "step": 25532, + "teacher_loss": 0.16525262594223022 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.3634466528892517, + "learning_rate": 7.748329086749217e-07, + "loss": 0.1728, + "step": 25533, + "teacher_loss": 0.151571124792099 + }, + { + "compression_loss": 0.0, + "epoch": 4.61, + "label_loss": 0.7089089155197144, + "learning_rate": 7.741125349524664e-07, + "loss": 0.2548, + "step": 25534, + "teacher_loss": 0.2043761909008026 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.712272047996521, + "learning_rate": 7.733924873869608e-07, + "loss": 0.2565, + "step": 25535, + "teacher_loss": 0.2058541178703308 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.19297215342521667, + "learning_rate": 7.726727659949101e-07, + "loss": 0.2351, + "step": 25536, + "teacher_loss": 0.23980659246444702 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.9660158157348633, + "learning_rate": 7.719533707928178e-07, + "loss": 0.3255, + "step": 25537, + "teacher_loss": 0.2543077766895294 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.342582106590271, + "learning_rate": 7.712343017971774e-07, + "loss": 0.2185, + "step": 25538, + "teacher_loss": 0.20470136404037476 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.43560075759887695, + "learning_rate": 7.705155590244739e-07, + "loss": 0.1936, + "step": 25539, + "teacher_loss": 0.1667361706495285 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5639803409576416, + "learning_rate": 7.697971424911843e-07, + "loss": 0.3377, + "step": 25540, + "teacher_loss": 0.31259313225746155 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.27974072098731995, + "learning_rate": 7.690790522137853e-07, + "loss": 0.1763, + "step": 25541, + "teacher_loss": 0.16482123732566833 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.1829862892627716, + "learning_rate": 7.683612882087354e-07, + "loss": 0.1623, + "step": 25542, + "teacher_loss": 0.1599699854850769 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.31607168912887573, + "learning_rate": 7.676438504924915e-07, + "loss": 0.1649, + "step": 25543, + "teacher_loss": 0.14807146787643433 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.21336233615875244, + "learning_rate": 7.669267390815072e-07, + "loss": 0.1692, + "step": 25544, + "teacher_loss": 0.1643315851688385 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.28381022810935974, + "learning_rate": 7.662099539922174e-07, + "loss": 0.2104, + "step": 25545, + "teacher_loss": 0.20228806138038635 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.44479912519454956, + "learning_rate": 7.654934952410559e-07, + "loss": 0.2231, + "step": 25546, + "teacher_loss": 0.19846567511558533 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.6885885000228882, + "learning_rate": 7.647773628444543e-07, + "loss": 0.3207, + "step": 25547, + "teacher_loss": 0.2797755002975464 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.42295897006988525, + "learning_rate": 7.640615568188297e-07, + "loss": 0.1948, + "step": 25548, + "teacher_loss": 0.16939936578273773 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.45511189103126526, + "learning_rate": 7.633460771805872e-07, + "loss": 0.2829, + "step": 25549, + "teacher_loss": 0.26379692554473877 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.4520115256309509, + "learning_rate": 7.626309239461387e-07, + "loss": 0.1953, + "step": 25550, + "teacher_loss": 0.16677230596542358 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3982808589935303, + "learning_rate": 7.619160971318779e-07, + "loss": 0.1543, + "step": 25551, + "teacher_loss": 0.12723052501678467 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.45673221349716187, + "learning_rate": 7.612015967541913e-07, + "loss": 0.1848, + "step": 25552, + "teacher_loss": 0.15456461906433105 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5517880916595459, + "learning_rate": 7.604874228294611e-07, + "loss": 0.2521, + "step": 25553, + "teacher_loss": 0.21880221366882324 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.29940539598464966, + "learning_rate": 7.597735753740659e-07, + "loss": 0.1522, + "step": 25554, + "teacher_loss": 0.13589292764663696 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.7003461122512817, + "learning_rate": 7.590600544043641e-07, + "loss": 0.2888, + "step": 25555, + "teacher_loss": 0.24308457970619202 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.7185056209564209, + "learning_rate": 7.583468599367194e-07, + "loss": 0.2463, + "step": 25556, + "teacher_loss": 0.19388815760612488 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5118451714515686, + "learning_rate": 7.576339919874853e-07, + "loss": 0.1889, + "step": 25557, + "teacher_loss": 0.15300381183624268 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5901716947555542, + "learning_rate": 7.569214505730021e-07, + "loss": 0.2599, + "step": 25558, + "teacher_loss": 0.22320988774299622 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.23936668038368225, + "learning_rate": 7.562092357096034e-07, + "loss": 0.1534, + "step": 25559, + "teacher_loss": 0.14387652277946472 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.35094988346099854, + "learning_rate": 7.554973474136245e-07, + "loss": 0.2182, + "step": 25560, + "teacher_loss": 0.20348691940307617 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3597275912761688, + "learning_rate": 7.547857857013857e-07, + "loss": 0.2287, + "step": 25561, + "teacher_loss": 0.21412979066371918 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.2918512225151062, + "learning_rate": 7.540745505891972e-07, + "loss": 0.2444, + "step": 25562, + "teacher_loss": 0.2391512393951416 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.21225619316101074, + "learning_rate": 7.533636420933676e-07, + "loss": 0.1826, + "step": 25563, + "teacher_loss": 0.17935171723365784 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.33985981345176697, + "learning_rate": 7.52653060230199e-07, + "loss": 0.2089, + "step": 25564, + "teacher_loss": 0.19436706602573395 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3982829749584198, + "learning_rate": 7.519428050159765e-07, + "loss": 0.2305, + "step": 25565, + "teacher_loss": 0.21181833744049072 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.38846033811569214, + "learning_rate": 7.51232876466989e-07, + "loss": 0.189, + "step": 25566, + "teacher_loss": 0.1667933613061905 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.41075244545936584, + "learning_rate": 7.505232745995116e-07, + "loss": 0.1615, + "step": 25567, + "teacher_loss": 0.133827343583107 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.2745397984981537, + "learning_rate": 7.498139994298131e-07, + "loss": 0.1802, + "step": 25568, + "teacher_loss": 0.1696842908859253 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.30793362855911255, + "learning_rate": 7.491050509741554e-07, + "loss": 0.2666, + "step": 25569, + "teacher_loss": 0.2619755268096924 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.25527530908584595, + "learning_rate": 7.483964292487938e-07, + "loss": 0.1715, + "step": 25570, + "teacher_loss": 0.16220590472221375 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5103049278259277, + "learning_rate": 7.476881342699721e-07, + "loss": 0.2718, + "step": 25571, + "teacher_loss": 0.24528378248214722 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.2960830628871918, + "learning_rate": 7.469801660539321e-07, + "loss": 0.1921, + "step": 25572, + "teacher_loss": 0.18057605624198914 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3303460478782654, + "learning_rate": 7.462725246169028e-07, + "loss": 0.175, + "step": 25573, + "teacher_loss": 0.1577080339193344 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3989889323711395, + "learning_rate": 7.455652099751109e-07, + "loss": 0.2498, + "step": 25574, + "teacher_loss": 0.23317056894302368 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3871164321899414, + "learning_rate": 7.448582221447702e-07, + "loss": 0.218, + "step": 25575, + "teacher_loss": 0.1991792768239975 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.45671385526657104, + "learning_rate": 7.441515611420913e-07, + "loss": 0.1974, + "step": 25576, + "teacher_loss": 0.1685422658920288 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5563538670539856, + "learning_rate": 7.434452269832776e-07, + "loss": 0.189, + "step": 25577, + "teacher_loss": 0.14819224178791046 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.2985634505748749, + "learning_rate": 7.427392196845195e-07, + "loss": 0.1743, + "step": 25578, + "teacher_loss": 0.16052848100662231 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.9515265226364136, + "learning_rate": 7.420335392620059e-07, + "loss": 0.2893, + "step": 25579, + "teacher_loss": 0.21571172773838043 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3071005642414093, + "learning_rate": 7.413281857319171e-07, + "loss": 0.189, + "step": 25580, + "teacher_loss": 0.17588868737220764 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.4065256416797638, + "learning_rate": 7.406231591104218e-07, + "loss": 0.1607, + "step": 25581, + "teacher_loss": 0.13335296511650085 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.35869109630584717, + "learning_rate": 7.399184594136854e-07, + "loss": 0.2627, + "step": 25582, + "teacher_loss": 0.2520214915275574 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.2841533124446869, + "learning_rate": 7.392140866578667e-07, + "loss": 0.2265, + "step": 25583, + "teacher_loss": 0.22011739015579224 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.17395257949829102, + "learning_rate": 7.385100408591111e-07, + "loss": 0.1695, + "step": 25584, + "teacher_loss": 0.16905122995376587 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.4164174795150757, + "learning_rate": 7.37806322033564e-07, + "loss": 0.2321, + "step": 25585, + "teacher_loss": 0.21157222986221313 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.21585646271705627, + "learning_rate": 7.371029301973559e-07, + "loss": 0.1595, + "step": 25586, + "teacher_loss": 0.15321585536003113 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5045528411865234, + "learning_rate": 7.363998653666154e-07, + "loss": 0.2399, + "step": 25587, + "teacher_loss": 0.21044325828552246 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.500564694404602, + "learning_rate": 7.356971275574632e-07, + "loss": 0.2539, + "step": 25588, + "teacher_loss": 0.22650116682052612 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.5118403434753418, + "learning_rate": 7.349947167860077e-07, + "loss": 0.2323, + "step": 25589, + "teacher_loss": 0.20127728581428528 + }, + { + "compression_loss": 0.0, + "epoch": 4.62, + "label_loss": 0.3456307351589203, + "learning_rate": 7.342926330683531e-07, + "loss": 0.1836, + "step": 25590, + "teacher_loss": 0.16555237770080566 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.5445907115936279, + "learning_rate": 7.335908764206012e-07, + "loss": 0.2796, + "step": 25591, + "teacher_loss": 0.25020328164100647 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.34492504596710205, + "learning_rate": 7.328894468588343e-07, + "loss": 0.1381, + "step": 25592, + "teacher_loss": 0.11506979912519455 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.5372344255447388, + "learning_rate": 7.321883443991412e-07, + "loss": 0.1994, + "step": 25593, + "teacher_loss": 0.16191115975379944 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.30108100175857544, + "learning_rate": 7.314875690575889e-07, + "loss": 0.2287, + "step": 25594, + "teacher_loss": 0.22062557935714722 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.5125848650932312, + "learning_rate": 7.307871208502498e-07, + "loss": 0.2031, + "step": 25595, + "teacher_loss": 0.16869372129440308 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.38981691002845764, + "learning_rate": 7.300869997931792e-07, + "loss": 0.2319, + "step": 25596, + "teacher_loss": 0.2143700271844864 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.6277039051055908, + "learning_rate": 7.293872059024292e-07, + "loss": 0.2517, + "step": 25597, + "teacher_loss": 0.209959015250206 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.4528503715991974, + "learning_rate": 7.286877391940472e-07, + "loss": 0.3272, + "step": 25598, + "teacher_loss": 0.3132913112640381 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.19746926426887512, + "learning_rate": 7.279885996840669e-07, + "loss": 0.1739, + "step": 25599, + "teacher_loss": 0.17132464051246643 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.1433059573173523, + "learning_rate": 7.27289787388517e-07, + "loss": 0.1802, + "step": 25600, + "teacher_loss": 0.1842639148235321 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.21477825939655304, + "learning_rate": 7.265913023234233e-07, + "loss": 0.1647, + "step": 25601, + "teacher_loss": 0.15916195511817932 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.18871772289276123, + "learning_rate": 7.258931445047945e-07, + "loss": 0.1656, + "step": 25602, + "teacher_loss": 0.16308562457561493 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.2316170036792755, + "learning_rate": 7.251953139486394e-07, + "loss": 0.1533, + "step": 25603, + "teacher_loss": 0.14454349875450134 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.43806397914886475, + "learning_rate": 7.244978106709621e-07, + "loss": 0.2314, + "step": 25604, + "teacher_loss": 0.20841875672340393 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.3230438828468323, + "learning_rate": 7.238006346877463e-07, + "loss": 0.1923, + "step": 25605, + "teacher_loss": 0.17780175805091858 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.15291255712509155, + "learning_rate": 7.231037860149791e-07, + "loss": 0.1477, + "step": 25606, + "teacher_loss": 0.14714054763317108 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.29740434885025024, + "learning_rate": 7.224072646686397e-07, + "loss": 0.1349, + "step": 25607, + "teacher_loss": 0.11679884046316147 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.4729596972465515, + "learning_rate": 7.217110706646968e-07, + "loss": 0.2186, + "step": 25608, + "teacher_loss": 0.19033998250961304 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.5154913663864136, + "learning_rate": 7.210152040191042e-07, + "loss": 0.1992, + "step": 25609, + "teacher_loss": 0.1640281081199646 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.5146692991256714, + "learning_rate": 7.203196647478277e-07, + "loss": 0.1948, + "step": 25610, + "teacher_loss": 0.15926247835159302 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.6050976514816284, + "learning_rate": 7.196244528668094e-07, + "loss": 0.2294, + "step": 25611, + "teacher_loss": 0.1876990795135498 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.25553473830223083, + "learning_rate": 7.189295683919849e-07, + "loss": 0.2326, + "step": 25612, + "teacher_loss": 0.23007425665855408 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.43125730752944946, + "learning_rate": 7.182350113392883e-07, + "loss": 0.2004, + "step": 25613, + "teacher_loss": 0.17477869987487793 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.5232822299003601, + "learning_rate": 7.175407817246466e-07, + "loss": 0.2006, + "step": 25614, + "teacher_loss": 0.16470903158187866 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.31230634450912476, + "learning_rate": 7.168468795639704e-07, + "loss": 0.235, + "step": 25615, + "teacher_loss": 0.22640183568000793 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.37346571683883667, + "learning_rate": 7.161533048731722e-07, + "loss": 0.183, + "step": 25616, + "teacher_loss": 0.161783367395401 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.9874559044837952, + "learning_rate": 7.154600576681575e-07, + "loss": 0.4559, + "step": 25617, + "teacher_loss": 0.396862655878067 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.2866028845310211, + "learning_rate": 7.147671379648152e-07, + "loss": 0.202, + "step": 25618, + "teacher_loss": 0.19260576367378235 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.31290319561958313, + "learning_rate": 7.140745457790276e-07, + "loss": 0.1381, + "step": 25619, + "teacher_loss": 0.11868340522050858 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.8316222429275513, + "learning_rate": 7.133822811266854e-07, + "loss": 0.3716, + "step": 25620, + "teacher_loss": 0.32044750452041626 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.585129976272583, + "learning_rate": 7.126903440236526e-07, + "loss": 0.2385, + "step": 25621, + "teacher_loss": 0.20002171397209167 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.39797505736351013, + "learning_rate": 7.119987344857948e-07, + "loss": 0.2714, + "step": 25622, + "teacher_loss": 0.25735002756118774 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.2894400954246521, + "learning_rate": 7.113074525289659e-07, + "loss": 0.2532, + "step": 25623, + "teacher_loss": 0.2491888403892517 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.9540418386459351, + "learning_rate": 7.1061649816902e-07, + "loss": 0.3049, + "step": 25624, + "teacher_loss": 0.23282021284103394 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.3704535663127899, + "learning_rate": 7.099258714217944e-07, + "loss": 0.2433, + "step": 25625, + "teacher_loss": 0.22912028431892395 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.6917981505393982, + "learning_rate": 7.092355723031246e-07, + "loss": 0.5024, + "step": 25626, + "teacher_loss": 0.4813328683376312 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.35530149936676025, + "learning_rate": 7.0854560082884e-07, + "loss": 0.2394, + "step": 25627, + "teacher_loss": 0.22655722498893738 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.7794751524925232, + "learning_rate": 7.078559570147542e-07, + "loss": 0.3024, + "step": 25628, + "teacher_loss": 0.24943819642066956 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.4597625732421875, + "learning_rate": 7.071666408766814e-07, + "loss": 0.2521, + "step": 25629, + "teacher_loss": 0.22905993461608887 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.21277078986167908, + "learning_rate": 7.064776524304256e-07, + "loss": 0.1723, + "step": 25630, + "teacher_loss": 0.1677531898021698 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.40412086248397827, + "learning_rate": 7.057889916917826e-07, + "loss": 0.2839, + "step": 25631, + "teacher_loss": 0.27058184146881104 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.588576078414917, + "learning_rate": 7.05100658676543e-07, + "loss": 0.2404, + "step": 25632, + "teacher_loss": 0.20175254344940186 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.4729962944984436, + "learning_rate": 7.044126534004875e-07, + "loss": 0.2411, + "step": 25633, + "teacher_loss": 0.21537786722183228 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.2468685805797577, + "learning_rate": 7.037249758793884e-07, + "loss": 0.1605, + "step": 25634, + "teacher_loss": 0.15085354447364807 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.4662829339504242, + "learning_rate": 7.030376261290134e-07, + "loss": 0.2232, + "step": 25635, + "teacher_loss": 0.19621434807777405 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.3375188410282135, + "learning_rate": 7.023506041651196e-07, + "loss": 0.1931, + "step": 25636, + "teacher_loss": 0.17704719305038452 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.3260479271411896, + "learning_rate": 7.016639100034627e-07, + "loss": 0.1929, + "step": 25637, + "teacher_loss": 0.1780531257390976 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.09531199932098389, + "learning_rate": 7.009775436597804e-07, + "loss": 0.1397, + "step": 25638, + "teacher_loss": 0.14464889466762543 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.49762487411499023, + "learning_rate": 7.002915051498132e-07, + "loss": 0.1918, + "step": 25639, + "teacher_loss": 0.15777619183063507 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.8030431866645813, + "learning_rate": 6.99605794489292e-07, + "loss": 0.2045, + "step": 25640, + "teacher_loss": 0.1380133032798767 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.8867901563644409, + "learning_rate": 6.989204116939324e-07, + "loss": 0.2593, + "step": 25641, + "teacher_loss": 0.18958324193954468 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.4173845648765564, + "learning_rate": 6.982353567794503e-07, + "loss": 0.2097, + "step": 25642, + "teacher_loss": 0.18664461374282837 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.3869072496891022, + "learning_rate": 6.975506297615547e-07, + "loss": 0.2059, + "step": 25643, + "teacher_loss": 0.18580420315265656 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.31851133704185486, + "learning_rate": 6.968662306559398e-07, + "loss": 0.2094, + "step": 25644, + "teacher_loss": 0.19726628065109253 + }, + { + "compression_loss": 0.0, + "epoch": 4.63, + "label_loss": 0.11432275176048279, + "learning_rate": 6.96182159478303e-07, + "loss": 0.1876, + "step": 25645, + "teacher_loss": 0.19571228325366974 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.7669801712036133, + "learning_rate": 6.954984162443201e-07, + "loss": 0.2468, + "step": 25646, + "teacher_loss": 0.18897007405757904 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.2976863384246826, + "learning_rate": 6.948150009696736e-07, + "loss": 0.1836, + "step": 25647, + "teacher_loss": 0.17088714241981506 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3376414179801941, + "learning_rate": 6.941319136700292e-07, + "loss": 0.1776, + "step": 25648, + "teacher_loss": 0.15978124737739563 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.30033180117607117, + "learning_rate": 6.934491543610494e-07, + "loss": 0.1745, + "step": 25649, + "teacher_loss": 0.16054022312164307 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.6786854267120361, + "learning_rate": 6.927667230583851e-07, + "loss": 0.3449, + "step": 25650, + "teacher_loss": 0.3078462481498718 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.15645639598369598, + "learning_rate": 6.920846197776887e-07, + "loss": 0.204, + "step": 25651, + "teacher_loss": 0.20927633345127106 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5199425220489502, + "learning_rate": 6.91402844534591e-07, + "loss": 0.2192, + "step": 25652, + "teacher_loss": 0.1858280599117279 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.46262991428375244, + "learning_rate": 6.907213973447279e-07, + "loss": 0.2432, + "step": 25653, + "teacher_loss": 0.2187812477350235 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.41090065240859985, + "learning_rate": 6.90040278223722e-07, + "loss": 0.1812, + "step": 25654, + "teacher_loss": 0.155717134475708 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.2709306478500366, + "learning_rate": 6.893594871871889e-07, + "loss": 0.1455, + "step": 25655, + "teacher_loss": 0.131536602973938 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3858177661895752, + "learning_rate": 6.88679024250738e-07, + "loss": 0.1804, + "step": 25656, + "teacher_loss": 0.15756326913833618 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.23808319866657257, + "learning_rate": 6.879988894299682e-07, + "loss": 0.1945, + "step": 25657, + "teacher_loss": 0.18960733711719513 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3815857172012329, + "learning_rate": 6.873190827404757e-07, + "loss": 0.1871, + "step": 25658, + "teacher_loss": 0.1655081808567047 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.43557536602020264, + "learning_rate": 6.866396041978429e-07, + "loss": 0.2346, + "step": 25659, + "teacher_loss": 0.21221569180488586 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5912463665008545, + "learning_rate": 6.859604538176506e-07, + "loss": 0.2836, + "step": 25660, + "teacher_loss": 0.2494516223669052 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.47608721256256104, + "learning_rate": 6.852816316154714e-07, + "loss": 0.1997, + "step": 25661, + "teacher_loss": 0.1689501702785492 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3079376518726349, + "learning_rate": 6.846031376068645e-07, + "loss": 0.1617, + "step": 25662, + "teacher_loss": 0.1454661339521408 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.35969436168670654, + "learning_rate": 6.839249718073875e-07, + "loss": 0.1552, + "step": 25663, + "teacher_loss": 0.13246308267116547 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.40328487753868103, + "learning_rate": 6.83247134232593e-07, + "loss": 0.2761, + "step": 25664, + "teacher_loss": 0.2619214653968811 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.2900179624557495, + "learning_rate": 6.825696248980135e-07, + "loss": 0.1598, + "step": 25665, + "teacher_loss": 0.14527863264083862 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.4816819429397583, + "learning_rate": 6.818924438191881e-07, + "loss": 0.2588, + "step": 25666, + "teacher_loss": 0.23401620984077454 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.16003361344337463, + "learning_rate": 6.812155910116429e-07, + "loss": 0.174, + "step": 25667, + "teacher_loss": 0.17555996775627136 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.24029219150543213, + "learning_rate": 6.80539066490894e-07, + "loss": 0.2021, + "step": 25668, + "teacher_loss": 0.19783999025821686 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.7034240961074829, + "learning_rate": 6.798628702724469e-07, + "loss": 0.2927, + "step": 25669, + "teacher_loss": 0.24701057374477386 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.16648727655410767, + "learning_rate": 6.791870023718161e-07, + "loss": 0.1135, + "step": 25670, + "teacher_loss": 0.10763737559318542 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.6138750314712524, + "learning_rate": 6.785114628044908e-07, + "loss": 0.2036, + "step": 25671, + "teacher_loss": 0.15796923637390137 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.06280869990587234, + "learning_rate": 6.778362515859554e-07, + "loss": 0.1131, + "step": 25672, + "teacher_loss": 0.1187053918838501 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5124033689498901, + "learning_rate": 6.771613687316991e-07, + "loss": 0.2114, + "step": 25673, + "teacher_loss": 0.17797735333442688 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.23839521408081055, + "learning_rate": 6.764868142571895e-07, + "loss": 0.2266, + "step": 25674, + "teacher_loss": 0.2252405732870102 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.6884697675704956, + "learning_rate": 6.758125881778926e-07, + "loss": 0.3043, + "step": 25675, + "teacher_loss": 0.2615863084793091 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.4559873640537262, + "learning_rate": 6.75138690509266e-07, + "loss": 0.2664, + "step": 25676, + "teacher_loss": 0.24529647827148438 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.19168272614479065, + "learning_rate": 6.744651212667624e-07, + "loss": 0.1591, + "step": 25677, + "teacher_loss": 0.15546078979969025 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5121338367462158, + "learning_rate": 6.737918804658227e-07, + "loss": 0.2452, + "step": 25678, + "teacher_loss": 0.2155478298664093 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.6976661682128906, + "learning_rate": 6.73118968121883e-07, + "loss": 0.2138, + "step": 25679, + "teacher_loss": 0.1600068062543869 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.6152940988540649, + "learning_rate": 6.724463842503725e-07, + "loss": 0.2473, + "step": 25680, + "teacher_loss": 0.2064152956008911 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3929380774497986, + "learning_rate": 6.717741288667106e-07, + "loss": 0.2205, + "step": 25681, + "teacher_loss": 0.20134258270263672 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.9830864667892456, + "learning_rate": 6.71102201986305e-07, + "loss": 0.3438, + "step": 25682, + "teacher_loss": 0.2727489471435547 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.7680139541625977, + "learning_rate": 6.7043060362457e-07, + "loss": 0.204, + "step": 25683, + "teacher_loss": 0.14131270349025726 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5719360113143921, + "learning_rate": 6.697593337968982e-07, + "loss": 0.3337, + "step": 25684, + "teacher_loss": 0.3072816729545593 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.7995215058326721, + "learning_rate": 6.690883925186792e-07, + "loss": 0.2476, + "step": 25685, + "teacher_loss": 0.1862201690673828 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5603985786437988, + "learning_rate": 6.68417779805297e-07, + "loss": 0.2464, + "step": 25686, + "teacher_loss": 0.2115139365196228 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.42832452058792114, + "learning_rate": 6.677474956721296e-07, + "loss": 0.2659, + "step": 25687, + "teacher_loss": 0.24789221584796906 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3751183748245239, + "learning_rate": 6.670775401345397e-07, + "loss": 0.1863, + "step": 25688, + "teacher_loss": 0.16534754633903503 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.4386843740940094, + "learning_rate": 6.664079132078881e-07, + "loss": 0.2543, + "step": 25689, + "teacher_loss": 0.2337934672832489 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.27081501483917236, + "learning_rate": 6.657386149075328e-07, + "loss": 0.1694, + "step": 25690, + "teacher_loss": 0.1581096351146698 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.6064146757125854, + "learning_rate": 6.650696452488114e-07, + "loss": 0.368, + "step": 25691, + "teacher_loss": 0.3415566682815552 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.5103209018707275, + "learning_rate": 6.64401004247065e-07, + "loss": 0.2843, + "step": 25692, + "teacher_loss": 0.25915688276290894 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.29974669218063354, + "learning_rate": 6.637326919176246e-07, + "loss": 0.137, + "step": 25693, + "teacher_loss": 0.11896763741970062 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.7630175352096558, + "learning_rate": 6.630647082758095e-07, + "loss": 0.2738, + "step": 25694, + "teacher_loss": 0.21943911910057068 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.17620250582695007, + "learning_rate": 6.623970533369377e-07, + "loss": 0.1645, + "step": 25695, + "teacher_loss": 0.1632494032382965 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.26038858294487, + "learning_rate": 6.617297271163153e-07, + "loss": 0.203, + "step": 25696, + "teacher_loss": 0.19657838344573975 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.7561020851135254, + "learning_rate": 6.610627296292415e-07, + "loss": 0.3138, + "step": 25697, + "teacher_loss": 0.26467224955558777 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.34898027777671814, + "learning_rate": 6.603960608910076e-07, + "loss": 0.2069, + "step": 25698, + "teacher_loss": 0.19112563133239746 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.3105284571647644, + "learning_rate": 6.597297209169012e-07, + "loss": 0.1863, + "step": 25699, + "teacher_loss": 0.1725083589553833 + }, + { + "compression_loss": 0.0, + "epoch": 4.64, + "label_loss": 0.33823779225349426, + "learning_rate": 6.590637097221985e-07, + "loss": 0.1797, + "step": 25700, + "teacher_loss": 0.16206462681293488 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.5531293749809265, + "learning_rate": 6.583980273221657e-07, + "loss": 0.2197, + "step": 25701, + "teacher_loss": 0.1826213002204895 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.634173572063446, + "learning_rate": 6.577326737320688e-07, + "loss": 0.2187, + "step": 25702, + "teacher_loss": 0.1725606620311737 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.5032634735107422, + "learning_rate": 6.57067648967164e-07, + "loss": 0.3513, + "step": 25703, + "teacher_loss": 0.33441758155822754 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.7262142896652222, + "learning_rate": 6.564029530426924e-07, + "loss": 0.3258, + "step": 25704, + "teacher_loss": 0.28128886222839355 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.27903062105178833, + "learning_rate": 6.557385859738985e-07, + "loss": 0.1565, + "step": 25705, + "teacher_loss": 0.14290419220924377 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.20355144143104553, + "learning_rate": 6.550745477760133e-07, + "loss": 0.1509, + "step": 25706, + "teacher_loss": 0.14506977796554565 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.30342260003089905, + "learning_rate": 6.544108384642583e-07, + "loss": 0.1726, + "step": 25707, + "teacher_loss": 0.15808308124542236 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.2158779799938202, + "learning_rate": 6.537474580538543e-07, + "loss": 0.2043, + "step": 25708, + "teacher_loss": 0.20299354195594788 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.4494536221027374, + "learning_rate": 6.530844065600078e-07, + "loss": 0.2172, + "step": 25709, + "teacher_loss": 0.19143524765968323 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3886851668357849, + "learning_rate": 6.524216839979214e-07, + "loss": 0.1932, + "step": 25710, + "teacher_loss": 0.17144650220870972 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.31629908084869385, + "learning_rate": 6.517592903827896e-07, + "loss": 0.2398, + "step": 25711, + "teacher_loss": 0.23134204745292664 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.4288142919540405, + "learning_rate": 6.510972257297987e-07, + "loss": 0.2319, + "step": 25712, + "teacher_loss": 0.21001750230789185 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.32861459255218506, + "learning_rate": 6.504354900541282e-07, + "loss": 0.205, + "step": 25713, + "teacher_loss": 0.19124548137187958 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.22582606971263885, + "learning_rate": 6.497740833709509e-07, + "loss": 0.1728, + "step": 25714, + "teacher_loss": 0.16688984632492065 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.26213061809539795, + "learning_rate": 6.491130056954297e-07, + "loss": 0.2253, + "step": 25715, + "teacher_loss": 0.22125375270843506 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.25960099697113037, + "learning_rate": 6.484522570427193e-07, + "loss": 0.1948, + "step": 25716, + "teacher_loss": 0.1875881552696228 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.200932577252388, + "learning_rate": 6.477918374279723e-07, + "loss": 0.1764, + "step": 25717, + "teacher_loss": 0.17363235354423523 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.2921501398086548, + "learning_rate": 6.471317468663284e-07, + "loss": 0.2458, + "step": 25718, + "teacher_loss": 0.24060551822185516 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.45219215750694275, + "learning_rate": 6.464719853729206e-07, + "loss": 0.2282, + "step": 25719, + "teacher_loss": 0.20335917174816132 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3557063937187195, + "learning_rate": 6.45812552962875e-07, + "loss": 0.2059, + "step": 25720, + "teacher_loss": 0.189256489276886 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.24168744683265686, + "learning_rate": 6.451534496513129e-07, + "loss": 0.1773, + "step": 25721, + "teacher_loss": 0.1701613962650299 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3478865921497345, + "learning_rate": 6.444946754533438e-07, + "loss": 0.2043, + "step": 25722, + "teacher_loss": 0.1883271038532257 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3882679343223572, + "learning_rate": 6.438362303840706e-07, + "loss": 0.1598, + "step": 25723, + "teacher_loss": 0.1343889832496643 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 1.0647556781768799, + "learning_rate": 6.43178114458593e-07, + "loss": 0.312, + "step": 25724, + "teacher_loss": 0.22836434841156006 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.39925098419189453, + "learning_rate": 6.425203276919956e-07, + "loss": 0.2356, + "step": 25725, + "teacher_loss": 0.21741333603858948 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.2608669698238373, + "learning_rate": 6.418628700993611e-07, + "loss": 0.1856, + "step": 25726, + "teacher_loss": 0.17718997597694397 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.29058313369750977, + "learning_rate": 6.412057416957645e-07, + "loss": 0.1787, + "step": 25727, + "teacher_loss": 0.16628967225551605 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.23646050691604614, + "learning_rate": 6.405489424962685e-07, + "loss": 0.1688, + "step": 25728, + "teacher_loss": 0.16126041114330292 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.30153656005859375, + "learning_rate": 6.398924725159328e-07, + "loss": 0.2615, + "step": 25729, + "teacher_loss": 0.2570296823978424 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.17028766870498657, + "learning_rate": 6.392363317698118e-07, + "loss": 0.2035, + "step": 25730, + "teacher_loss": 0.20723098516464233 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.4668811559677124, + "learning_rate": 6.385805202729455e-07, + "loss": 0.223, + "step": 25731, + "teacher_loss": 0.19588899612426758 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.6332976818084717, + "learning_rate": 6.379250380403667e-07, + "loss": 0.2293, + "step": 25732, + "teacher_loss": 0.18445730209350586 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.5107493996620178, + "learning_rate": 6.372698850871101e-07, + "loss": 0.4154, + "step": 25733, + "teacher_loss": 0.4048248529434204 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3618752956390381, + "learning_rate": 6.366150614281934e-07, + "loss": 0.2644, + "step": 25734, + "teacher_loss": 0.2535882592201233 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.6782310009002686, + "learning_rate": 6.359605670786284e-07, + "loss": 0.2652, + "step": 25735, + "teacher_loss": 0.2192636877298355 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.41506946086883545, + "learning_rate": 6.35306402053421e-07, + "loss": 0.3022, + "step": 25736, + "teacher_loss": 0.2896573543548584 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.2939988076686859, + "learning_rate": 6.346525663675728e-07, + "loss": 0.1835, + "step": 25737, + "teacher_loss": 0.17123769223690033 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.30140218138694763, + "learning_rate": 6.339990600360701e-07, + "loss": 0.2192, + "step": 25738, + "teacher_loss": 0.21011903882026672 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.25520533323287964, + "learning_rate": 6.333458830738975e-07, + "loss": 0.2916, + "step": 25739, + "teacher_loss": 0.29566842317581177 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3339117765426636, + "learning_rate": 6.326930354960314e-07, + "loss": 0.1804, + "step": 25740, + "teacher_loss": 0.16332292556762695 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.1938106268644333, + "learning_rate": 6.320405173174398e-07, + "loss": 0.199, + "step": 25741, + "teacher_loss": 0.19963189959526062 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.6745530366897583, + "learning_rate": 6.313883285530775e-07, + "loss": 0.3251, + "step": 25742, + "teacher_loss": 0.2862340807914734 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3562083840370178, + "learning_rate": 6.307364692179041e-07, + "loss": 0.2187, + "step": 25743, + "teacher_loss": 0.2034008800983429 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.2091292142868042, + "learning_rate": 6.300849393268626e-07, + "loss": 0.1729, + "step": 25744, + "teacher_loss": 0.1689196527004242 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.4034806489944458, + "learning_rate": 6.294337388948895e-07, + "loss": 0.1921, + "step": 25745, + "teacher_loss": 0.16856998205184937 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.41607001423835754, + "learning_rate": 6.287828679369145e-07, + "loss": 0.2601, + "step": 25746, + "teacher_loss": 0.24279560148715973 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.13569307327270508, + "learning_rate": 6.28132326467864e-07, + "loss": 0.2127, + "step": 25747, + "teacher_loss": 0.2212495058774948 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.6997978091239929, + "learning_rate": 6.274821145026477e-07, + "loss": 0.2123, + "step": 25748, + "teacher_loss": 0.15816038846969604 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3258850574493408, + "learning_rate": 6.268322320561753e-07, + "loss": 0.1814, + "step": 25749, + "teacher_loss": 0.1653926521539688 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.5280095338821411, + "learning_rate": 6.261826791433484e-07, + "loss": 0.239, + "step": 25750, + "teacher_loss": 0.20692920684814453 + }, + { + "epoch": 4.65, + "eval_exact_match": 80.5771050141911, + "eval_f1": 87.79693509641605, + "step": 25750 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.4342218339443207, + "learning_rate": 6.255334557790565e-07, + "loss": 0.2126, + "step": 25751, + "teacher_loss": 0.1880270391702652 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.25451648235321045, + "learning_rate": 6.248845619781862e-07, + "loss": 0.1481, + "step": 25752, + "teacher_loss": 0.13623470067977905 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.45443764328956604, + "learning_rate": 6.242359977556156e-07, + "loss": 0.2384, + "step": 25753, + "teacher_loss": 0.21434694528579712 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3491644263267517, + "learning_rate": 6.235877631262093e-07, + "loss": 0.1862, + "step": 25754, + "teacher_loss": 0.16814668476581573 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.3918796181678772, + "learning_rate": 6.22939858104834e-07, + "loss": 0.2335, + "step": 25755, + "teacher_loss": 0.2158847451210022 + }, + { + "compression_loss": 0.0, + "epoch": 4.65, + "label_loss": 0.20084181427955627, + "learning_rate": 6.22292282706346e-07, + "loss": 0.1867, + "step": 25756, + "teacher_loss": 0.18509957194328308 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.4287026822566986, + "learning_rate": 6.216450369455867e-07, + "loss": 0.2106, + "step": 25757, + "teacher_loss": 0.18631555140018463 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.8670519590377808, + "learning_rate": 6.209981208373993e-07, + "loss": 0.2976, + "step": 25758, + "teacher_loss": 0.23430058360099792 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.24358630180358887, + "learning_rate": 6.203515343966137e-07, + "loss": 0.1545, + "step": 25759, + "teacher_loss": 0.14460918307304382 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.2879866361618042, + "learning_rate": 6.197052776380563e-07, + "loss": 0.1686, + "step": 25760, + "teacher_loss": 0.15537038445472717 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.24190539121627808, + "learning_rate": 6.190593505765401e-07, + "loss": 0.1817, + "step": 25761, + "teacher_loss": 0.17497220635414124 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.558539092540741, + "learning_rate": 6.184137532268769e-07, + "loss": 0.3034, + "step": 25762, + "teacher_loss": 0.27502989768981934 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.39625072479248047, + "learning_rate": 6.177684856038712e-07, + "loss": 0.2025, + "step": 25763, + "teacher_loss": 0.18094715476036072 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.29281866550445557, + "learning_rate": 6.171235477223114e-07, + "loss": 0.1585, + "step": 25764, + "teacher_loss": 0.14356368780136108 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.4449898302555084, + "learning_rate": 6.164789395969855e-07, + "loss": 0.1826, + "step": 25765, + "teacher_loss": 0.15349453687667847 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.3851661682128906, + "learning_rate": 6.158346612426769e-07, + "loss": 0.1965, + "step": 25766, + "teacher_loss": 0.17556919157505035 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.20041537284851074, + "learning_rate": 6.151907126741502e-07, + "loss": 0.178, + "step": 25767, + "teacher_loss": 0.1755596399307251 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.9967182874679565, + "learning_rate": 6.145470939061754e-07, + "loss": 0.697, + "step": 25768, + "teacher_loss": 0.6636947393417358 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.24394464492797852, + "learning_rate": 6.139038049535039e-07, + "loss": 0.1818, + "step": 25769, + "teacher_loss": 0.1748485267162323 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.6486415863037109, + "learning_rate": 6.132608458308875e-07, + "loss": 0.2324, + "step": 25770, + "teacher_loss": 0.18617627024650574 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.8195672035217285, + "learning_rate": 6.126182165530658e-07, + "loss": 0.2712, + "step": 25771, + "teacher_loss": 0.21029126644134521 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.17748457193374634, + "learning_rate": 6.119759171347722e-07, + "loss": 0.1706, + "step": 25772, + "teacher_loss": 0.1698291152715683 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.5725502967834473, + "learning_rate": 6.113339475907331e-07, + "loss": 0.1868, + "step": 25773, + "teacher_loss": 0.14397728443145752 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.142591655254364, + "learning_rate": 6.106923079356703e-07, + "loss": 0.1733, + "step": 25774, + "teacher_loss": 0.1767416149377823 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.3679741322994232, + "learning_rate": 6.100509981842883e-07, + "loss": 0.3127, + "step": 25775, + "teacher_loss": 0.3065851926803589 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.27976611256599426, + "learning_rate": 6.094100183512924e-07, + "loss": 0.1878, + "step": 25776, + "teacher_loss": 0.17762205004692078 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.5629473924636841, + "learning_rate": 6.08769368451384e-07, + "loss": 0.2225, + "step": 25777, + "teacher_loss": 0.18463751673698425 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.2546696662902832, + "learning_rate": 6.08129048499243e-07, + "loss": 0.199, + "step": 25778, + "teacher_loss": 0.19276180863380432 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.5035284161567688, + "learning_rate": 6.074890585095544e-07, + "loss": 0.2043, + "step": 25779, + "teacher_loss": 0.17109665274620056 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.4446781277656555, + "learning_rate": 6.068493984969931e-07, + "loss": 0.2318, + "step": 25780, + "teacher_loss": 0.20811288058757782 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.31238439679145813, + "learning_rate": 6.062100684762223e-07, + "loss": 0.2012, + "step": 25781, + "teacher_loss": 0.18881163001060486 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.22814130783081055, + "learning_rate": 6.055710684618971e-07, + "loss": 0.1909, + "step": 25782, + "teacher_loss": 0.18673820793628693 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.11562801897525787, + "learning_rate": 6.049323984686706e-07, + "loss": 0.1475, + "step": 25783, + "teacher_loss": 0.1510196030139923 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 1.1436388492584229, + "learning_rate": 6.042940585111878e-07, + "loss": 0.4129, + "step": 25784, + "teacher_loss": 0.33171939849853516 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.48620516061782837, + "learning_rate": 6.036560486040805e-07, + "loss": 0.2535, + "step": 25785, + "teacher_loss": 0.22760051488876343 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.28465378284454346, + "learning_rate": 6.030183687619767e-07, + "loss": 0.1578, + "step": 25786, + "teacher_loss": 0.14365479350090027 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.19246599078178406, + "learning_rate": 6.023810189994983e-07, + "loss": 0.1458, + "step": 25787, + "teacher_loss": 0.14062535762786865 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.7967947721481323, + "learning_rate": 6.017439993312568e-07, + "loss": 0.2367, + "step": 25788, + "teacher_loss": 0.17451095581054688 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.3969930410385132, + "learning_rate": 6.011073097718556e-07, + "loss": 0.2657, + "step": 25789, + "teacher_loss": 0.25114157795906067 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.41787639260292053, + "learning_rate": 6.004709503358963e-07, + "loss": 0.1738, + "step": 25790, + "teacher_loss": 0.14666375517845154 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.2856449782848358, + "learning_rate": 5.998349210379656e-07, + "loss": 0.1764, + "step": 25791, + "teacher_loss": 0.16425052285194397 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.29506975412368774, + "learning_rate": 5.991992218926434e-07, + "loss": 0.2261, + "step": 25792, + "teacher_loss": 0.21844318509101868 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.24840043485164642, + "learning_rate": 5.985638529145115e-07, + "loss": 0.1918, + "step": 25793, + "teacher_loss": 0.18555346131324768 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.39574193954467773, + "learning_rate": 5.979288141181316e-07, + "loss": 0.2516, + "step": 25794, + "teacher_loss": 0.2356257438659668 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.7872180938720703, + "learning_rate": 5.972941055180603e-07, + "loss": 0.2333, + "step": 25795, + "teacher_loss": 0.17171096801757812 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.7270216941833496, + "learning_rate": 5.966597271288576e-07, + "loss": 0.2162, + "step": 25796, + "teacher_loss": 0.15946140885353088 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.32171833515167236, + "learning_rate": 5.960256789650637e-07, + "loss": 0.1657, + "step": 25797, + "teacher_loss": 0.148350328207016 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.3438599109649658, + "learning_rate": 5.95391961041215e-07, + "loss": 0.2084, + "step": 25798, + "teacher_loss": 0.19338008761405945 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.49039924144744873, + "learning_rate": 5.947585733718402e-07, + "loss": 0.2439, + "step": 25799, + "teacher_loss": 0.21648633480072021 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.24343052506446838, + "learning_rate": 5.941255159714643e-07, + "loss": 0.188, + "step": 25800, + "teacher_loss": 0.18182893097400665 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.6688398122787476, + "learning_rate": 5.934927888545972e-07, + "loss": 0.2455, + "step": 25801, + "teacher_loss": 0.19842402637004852 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.46803441643714905, + "learning_rate": 5.928603920357473e-07, + "loss": 0.2016, + "step": 25802, + "teacher_loss": 0.1720259040594101 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.8841602802276611, + "learning_rate": 5.922283255294164e-07, + "loss": 0.2648, + "step": 25803, + "teacher_loss": 0.19603434205055237 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.264207661151886, + "learning_rate": 5.91596589350093e-07, + "loss": 0.1601, + "step": 25804, + "teacher_loss": 0.14856381714344025 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.4181753993034363, + "learning_rate": 5.90965183512257e-07, + "loss": 0.1864, + "step": 25805, + "teacher_loss": 0.16061249375343323 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.3414986729621887, + "learning_rate": 5.903341080303937e-07, + "loss": 0.2045, + "step": 25806, + "teacher_loss": 0.18928295373916626 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.29306602478027344, + "learning_rate": 5.897033629189646e-07, + "loss": 0.2084, + "step": 25807, + "teacher_loss": 0.19904470443725586 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.4902295768260956, + "learning_rate": 5.890729481924334e-07, + "loss": 0.2712, + "step": 25808, + "teacher_loss": 0.2468898892402649 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.38052690029144287, + "learning_rate": 5.884428638652534e-07, + "loss": 0.2066, + "step": 25809, + "teacher_loss": 0.18727847933769226 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.09350591897964478, + "learning_rate": 5.878131099518713e-07, + "loss": 0.1806, + "step": 25810, + "teacher_loss": 0.19027632474899292 + }, + { + "compression_loss": 0.0, + "epoch": 4.66, + "label_loss": 0.8894171118736267, + "learning_rate": 5.871836864667224e-07, + "loss": 0.3266, + "step": 25811, + "teacher_loss": 0.26401466131210327 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.33329227566719055, + "learning_rate": 5.8655459342424e-07, + "loss": 0.2228, + "step": 25812, + "teacher_loss": 0.21049004793167114 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.21610203385353088, + "learning_rate": 5.859258308388493e-07, + "loss": 0.192, + "step": 25813, + "teacher_loss": 0.1893693506717682 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 1.2249739170074463, + "learning_rate": 5.852973987249622e-07, + "loss": 0.2797, + "step": 25814, + "teacher_loss": 0.17469385266304016 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.6751925945281982, + "learning_rate": 5.846692970969869e-07, + "loss": 0.2552, + "step": 25815, + "teacher_loss": 0.20848111808300018 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.3831615447998047, + "learning_rate": 5.840415259693271e-07, + "loss": 0.2966, + "step": 25816, + "teacher_loss": 0.2869266867637634 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.6506332755088806, + "learning_rate": 5.83414085356373e-07, + "loss": 0.2634, + "step": 25817, + "teacher_loss": 0.22037768363952637 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.5384520888328552, + "learning_rate": 5.827869752725129e-07, + "loss": 0.1952, + "step": 25818, + "teacher_loss": 0.15702220797538757 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.21663898229599, + "learning_rate": 5.821601957321205e-07, + "loss": 0.1718, + "step": 25819, + "teacher_loss": 0.1668156087398529 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.3887439966201782, + "learning_rate": 5.815337467495674e-07, + "loss": 0.206, + "step": 25820, + "teacher_loss": 0.18573185801506042 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.16601096093654633, + "learning_rate": 5.809076283392173e-07, + "loss": 0.1483, + "step": 25821, + "teacher_loss": 0.14637970924377441 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.22529301047325134, + "learning_rate": 5.802818405154236e-07, + "loss": 0.176, + "step": 25822, + "teacher_loss": 0.17051096260547638 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.6318166255950928, + "learning_rate": 5.796563832925384e-07, + "loss": 0.2397, + "step": 25823, + "teacher_loss": 0.19617262482643127 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2434415966272354, + "learning_rate": 5.790312566848932e-07, + "loss": 0.254, + "step": 25824, + "teacher_loss": 0.2551822364330292 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.4386853277683258, + "learning_rate": 5.784064607068268e-07, + "loss": 0.2015, + "step": 25825, + "teacher_loss": 0.17517027258872986 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2766663432121277, + "learning_rate": 5.777819953726643e-07, + "loss": 0.1875, + "step": 25826, + "teacher_loss": 0.1776140034198761 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.30967405438423157, + "learning_rate": 5.771578606967176e-07, + "loss": 0.1982, + "step": 25827, + "teacher_loss": 0.18580153584480286 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.40260255336761475, + "learning_rate": 5.765340566932986e-07, + "loss": 0.2086, + "step": 25828, + "teacher_loss": 0.18705829977989197 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.5702332258224487, + "learning_rate": 5.759105833767125e-07, + "loss": 0.2061, + "step": 25829, + "teacher_loss": 0.1656341254711151 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.375188410282135, + "learning_rate": 5.752874407612496e-07, + "loss": 0.2271, + "step": 25830, + "teacher_loss": 0.21059849858283997 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.7497886419296265, + "learning_rate": 5.746646288612001e-07, + "loss": 0.2201, + "step": 25831, + "teacher_loss": 0.16120293736457825 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.4084872603416443, + "learning_rate": 5.740421476908391e-07, + "loss": 0.2104, + "step": 25832, + "teacher_loss": 0.18836282193660736 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2332880198955536, + "learning_rate": 5.734199972644405e-07, + "loss": 0.1626, + "step": 25833, + "teacher_loss": 0.1547866016626358 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.33135986328125, + "learning_rate": 5.72798177596271e-07, + "loss": 0.1766, + "step": 25834, + "teacher_loss": 0.1593727171421051 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2378404438495636, + "learning_rate": 5.721766887005808e-07, + "loss": 0.2245, + "step": 25835, + "teacher_loss": 0.22299891710281372 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.49611324071884155, + "learning_rate": 5.71555530591622e-07, + "loss": 0.2316, + "step": 25836, + "teacher_loss": 0.202169269323349 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.46056610345840454, + "learning_rate": 5.709347032836398e-07, + "loss": 0.2216, + "step": 25837, + "teacher_loss": 0.19508647918701172 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.5267481803894043, + "learning_rate": 5.703142067908613e-07, + "loss": 0.207, + "step": 25838, + "teacher_loss": 0.17151404917240143 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.3611549437046051, + "learning_rate": 5.696940411275165e-07, + "loss": 0.2579, + "step": 25839, + "teacher_loss": 0.24646760523319244 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.17160308361053467, + "learning_rate": 5.690742063078242e-07, + "loss": 0.2163, + "step": 25840, + "teacher_loss": 0.2213120311498642 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.32062798738479614, + "learning_rate": 5.68454702345993e-07, + "loss": 0.2008, + "step": 25841, + "teacher_loss": 0.18744905292987823 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.317829430103302, + "learning_rate": 5.678355292562248e-07, + "loss": 0.2204, + "step": 25842, + "teacher_loss": 0.20954085886478424 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.24208903312683105, + "learning_rate": 5.672166870527235e-07, + "loss": 0.1942, + "step": 25843, + "teacher_loss": 0.18889720737934113 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.3107760548591614, + "learning_rate": 5.665981757496691e-07, + "loss": 0.1781, + "step": 25844, + "teacher_loss": 0.1633348912000656 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.990404486656189, + "learning_rate": 5.659799953612438e-07, + "loss": 0.3312, + "step": 25845, + "teacher_loss": 0.25790154933929443 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2966078519821167, + "learning_rate": 5.653621459016229e-07, + "loss": 0.1971, + "step": 25846, + "teacher_loss": 0.18606823682785034 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.36667609214782715, + "learning_rate": 5.647446273849716e-07, + "loss": 0.2097, + "step": 25847, + "teacher_loss": 0.19222337007522583 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.36348435282707214, + "learning_rate": 5.641274398254454e-07, + "loss": 0.2474, + "step": 25848, + "teacher_loss": 0.23451447486877441 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.4853397011756897, + "learning_rate": 5.635105832371962e-07, + "loss": 0.2359, + "step": 25849, + "teacher_loss": 0.20815902948379517 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.6518690586090088, + "learning_rate": 5.628940576343677e-07, + "loss": 0.2333, + "step": 25850, + "teacher_loss": 0.18679219484329224 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.8329676389694214, + "learning_rate": 5.622778630310921e-07, + "loss": 0.2228, + "step": 25851, + "teacher_loss": 0.15503638982772827 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.9448129534721375, + "learning_rate": 5.616619994414996e-07, + "loss": 0.3441, + "step": 25852, + "teacher_loss": 0.2773763835430145 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.5836213827133179, + "learning_rate": 5.610464668797088e-07, + "loss": 0.2321, + "step": 25853, + "teacher_loss": 0.1930159032344818 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.41513097286224365, + "learning_rate": 5.604312653598337e-07, + "loss": 0.2477, + "step": 25854, + "teacher_loss": 0.22908906638622284 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.6520373821258545, + "learning_rate": 5.598163948959745e-07, + "loss": 0.2191, + "step": 25855, + "teacher_loss": 0.17097894847393036 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2728990912437439, + "learning_rate": 5.592018555022332e-07, + "loss": 0.2029, + "step": 25856, + "teacher_loss": 0.1950836181640625 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.9071080088615417, + "learning_rate": 5.585876471927004e-07, + "loss": 0.3273, + "step": 25857, + "teacher_loss": 0.2628253400325775 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.5643683671951294, + "learning_rate": 5.579737699814513e-07, + "loss": 0.2332, + "step": 25858, + "teacher_loss": 0.19640018045902252 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.621512770652771, + "learning_rate": 5.573602238825648e-07, + "loss": 0.2175, + "step": 25859, + "teacher_loss": 0.17263853549957275 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.42945218086242676, + "learning_rate": 5.567470089101078e-07, + "loss": 0.2696, + "step": 25860, + "teacher_loss": 0.2518458366394043 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.20666979253292084, + "learning_rate": 5.561341250781393e-07, + "loss": 0.1899, + "step": 25861, + "teacher_loss": 0.18803198635578156 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.22259894013404846, + "learning_rate": 5.555215724007079e-07, + "loss": 0.1907, + "step": 25862, + "teacher_loss": 0.18710389733314514 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.6280340552330017, + "learning_rate": 5.54909350891864e-07, + "loss": 0.2556, + "step": 25863, + "teacher_loss": 0.21424134075641632 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.49876752495765686, + "learning_rate": 5.542974605656382e-07, + "loss": 0.2816, + "step": 25864, + "teacher_loss": 0.2574690878391266 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.2558981776237488, + "learning_rate": 5.53685901436059e-07, + "loss": 0.1591, + "step": 25865, + "teacher_loss": 0.1483120322227478 + }, + { + "compression_loss": 0.0, + "epoch": 4.67, + "label_loss": 0.3983690142631531, + "learning_rate": 5.530746735171521e-07, + "loss": 0.1658, + "step": 25866, + "teacher_loss": 0.13998062908649445 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.3608073592185974, + "learning_rate": 5.524637768229296e-07, + "loss": 0.3361, + "step": 25867, + "teacher_loss": 0.3333306908607483 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.6249392032623291, + "learning_rate": 5.518532113673952e-07, + "loss": 0.2665, + "step": 25868, + "teacher_loss": 0.22670280933380127 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.4085375666618347, + "learning_rate": 5.512429771645477e-07, + "loss": 0.2039, + "step": 25869, + "teacher_loss": 0.1812104433774948 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.40966761112213135, + "learning_rate": 5.506330742283827e-07, + "loss": 0.2384, + "step": 25870, + "teacher_loss": 0.2193642556667328 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5498589277267456, + "learning_rate": 5.500235025728772e-07, + "loss": 0.199, + "step": 25871, + "teacher_loss": 0.16002202033996582 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5666399598121643, + "learning_rate": 5.494142622120085e-07, + "loss": 0.2975, + "step": 25872, + "teacher_loss": 0.26759302616119385 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.6202941536903381, + "learning_rate": 5.488053531597487e-07, + "loss": 0.2412, + "step": 25873, + "teacher_loss": 0.19907745718955994 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 1.4289371967315674, + "learning_rate": 5.481967754300532e-07, + "loss": 0.4802, + "step": 25874, + "teacher_loss": 0.3747596740722656 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.32796916365623474, + "learning_rate": 5.47588529036876e-07, + "loss": 0.1842, + "step": 25875, + "teacher_loss": 0.16820621490478516 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5980788469314575, + "learning_rate": 5.469806139941657e-07, + "loss": 0.3136, + "step": 25876, + "teacher_loss": 0.28204143047332764 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.33545443415641785, + "learning_rate": 5.463730303158565e-07, + "loss": 0.176, + "step": 25877, + "teacher_loss": 0.1582593470811844 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.3517675995826721, + "learning_rate": 5.457657780158787e-07, + "loss": 0.2267, + "step": 25878, + "teacher_loss": 0.21277594566345215 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5439707040786743, + "learning_rate": 5.451588571081579e-07, + "loss": 0.2126, + "step": 25879, + "teacher_loss": 0.17580318450927734 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5084367394447327, + "learning_rate": 5.445522676066045e-07, + "loss": 0.1918, + "step": 25880, + "teacher_loss": 0.15664368867874146 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.6170657277107239, + "learning_rate": 5.439460095251292e-07, + "loss": 0.224, + "step": 25881, + "teacher_loss": 0.18032225966453552 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.30646902322769165, + "learning_rate": 5.433400828776291e-07, + "loss": 0.1885, + "step": 25882, + "teacher_loss": 0.17540176212787628 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5375478267669678, + "learning_rate": 5.427344876779966e-07, + "loss": 0.2198, + "step": 25883, + "teacher_loss": 0.18446362018585205 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.46183329820632935, + "learning_rate": 5.421292239401205e-07, + "loss": 0.2322, + "step": 25884, + "teacher_loss": 0.20667783915996552 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.42828792333602905, + "learning_rate": 5.415242916778729e-07, + "loss": 0.2164, + "step": 25885, + "teacher_loss": 0.19288897514343262 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.4741644859313965, + "learning_rate": 5.409196909051245e-07, + "loss": 0.2713, + "step": 25886, + "teacher_loss": 0.248794823884964 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.19874463975429535, + "learning_rate": 5.403154216357359e-07, + "loss": 0.154, + "step": 25887, + "teacher_loss": 0.14904123544692993 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.3233036696910858, + "learning_rate": 5.397114838835627e-07, + "loss": 0.1596, + "step": 25888, + "teacher_loss": 0.14137771725654602 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.4941243529319763, + "learning_rate": 5.391078776624519e-07, + "loss": 0.2258, + "step": 25889, + "teacher_loss": 0.19601207971572876 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.3330090343952179, + "learning_rate": 5.385046029862412e-07, + "loss": 0.1743, + "step": 25890, + "teacher_loss": 0.15662416815757751 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.1532750129699707, + "learning_rate": 5.379016598687625e-07, + "loss": 0.192, + "step": 25891, + "teacher_loss": 0.1963367760181427 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 1.2537908554077148, + "learning_rate": 5.372990483238382e-07, + "loss": 0.3827, + "step": 25892, + "teacher_loss": 0.2858789563179016 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.7413392066955566, + "learning_rate": 5.366967683652857e-07, + "loss": 0.2431, + "step": 25893, + "teacher_loss": 0.18769536912441254 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.27084922790527344, + "learning_rate": 5.360948200069138e-07, + "loss": 0.1833, + "step": 25894, + "teacher_loss": 0.173615962266922 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.49576735496520996, + "learning_rate": 5.354932032625215e-07, + "loss": 0.1867, + "step": 25895, + "teacher_loss": 0.15232758224010468 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.3868037462234497, + "learning_rate": 5.348919181459028e-07, + "loss": 0.1668, + "step": 25896, + "teacher_loss": 0.1423090100288391 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.6711556911468506, + "learning_rate": 5.34290964670845e-07, + "loss": 0.239, + "step": 25897, + "teacher_loss": 0.19097892940044403 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.47073009610176086, + "learning_rate": 5.336903428511236e-07, + "loss": 0.2727, + "step": 25898, + "teacher_loss": 0.2506440579891205 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5025217533111572, + "learning_rate": 5.330900527005111e-07, + "loss": 0.1952, + "step": 25899, + "teacher_loss": 0.1610921323299408 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.46776843070983887, + "learning_rate": 5.324900942327715e-07, + "loss": 0.2487, + "step": 25900, + "teacher_loss": 0.22436915338039398 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.32462388277053833, + "learning_rate": 5.318904674616554e-07, + "loss": 0.2332, + "step": 25901, + "teacher_loss": 0.222994863986969 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.7252461314201355, + "learning_rate": 5.312911724009135e-07, + "loss": 0.2046, + "step": 25902, + "teacher_loss": 0.1467868685722351 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.5057038068771362, + "learning_rate": 5.30692209064288e-07, + "loss": 0.2614, + "step": 25903, + "teacher_loss": 0.23421171307563782 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.4243880808353424, + "learning_rate": 5.30093577465508e-07, + "loss": 0.1984, + "step": 25904, + "teacher_loss": 0.17333757877349854 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.29853904247283936, + "learning_rate": 5.294952776182976e-07, + "loss": 0.1986, + "step": 25905, + "teacher_loss": 0.18751922249794006 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.2802067995071411, + "learning_rate": 5.288973095363775e-07, + "loss": 0.2131, + "step": 25906, + "teacher_loss": 0.20567169785499573 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.3249923288822174, + "learning_rate": 5.28299673233455e-07, + "loss": 0.1695, + "step": 25907, + "teacher_loss": 0.15224722027778625 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.6771851181983948, + "learning_rate": 5.277023687232324e-07, + "loss": 0.2565, + "step": 25908, + "teacher_loss": 0.2097373753786087 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.0902882069349289, + "learning_rate": 5.271053960194022e-07, + "loss": 0.1744, + "step": 25909, + "teacher_loss": 0.1837047040462494 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.19433481991291046, + "learning_rate": 5.265087551356567e-07, + "loss": 0.1422, + "step": 25910, + "teacher_loss": 0.13642413914203644 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.24260929226875305, + "learning_rate": 5.259124460856701e-07, + "loss": 0.1859, + "step": 25911, + "teacher_loss": 0.1795925796031952 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.25381535291671753, + "learning_rate": 5.253164688831146e-07, + "loss": 0.191, + "step": 25912, + "teacher_loss": 0.1839832216501236 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.40698927640914917, + "learning_rate": 5.247208235416578e-07, + "loss": 0.2184, + "step": 25913, + "teacher_loss": 0.19747236371040344 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.24295800924301147, + "learning_rate": 5.24125510074952e-07, + "loss": 0.242, + "step": 25914, + "teacher_loss": 0.24186952412128448 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.7454047799110413, + "learning_rate": 5.235305284966446e-07, + "loss": 0.2314, + "step": 25915, + "teacher_loss": 0.1743362843990326 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.38935086131095886, + "learning_rate": 5.229358788203831e-07, + "loss": 0.1942, + "step": 25916, + "teacher_loss": 0.1725580096244812 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.19750481843948364, + "learning_rate": 5.223415610597981e-07, + "loss": 0.1343, + "step": 25917, + "teacher_loss": 0.12725582718849182 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.18662095069885254, + "learning_rate": 5.217475752285106e-07, + "loss": 0.1837, + "step": 25918, + "teacher_loss": 0.1834072321653366 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.4534686207771301, + "learning_rate": 5.211539213401462e-07, + "loss": 0.1539, + "step": 25919, + "teacher_loss": 0.12057413160800934 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.6859146356582642, + "learning_rate": 5.205605994083124e-07, + "loss": 0.2754, + "step": 25920, + "teacher_loss": 0.22978872060775757 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.2941237986087799, + "learning_rate": 5.199676094466116e-07, + "loss": 0.1949, + "step": 25921, + "teacher_loss": 0.18383683264255524 + }, + { + "compression_loss": 0.0, + "epoch": 4.68, + "label_loss": 0.27393823862075806, + "learning_rate": 5.193749514686397e-07, + "loss": 0.1796, + "step": 25922, + "teacher_loss": 0.169064462184906 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.27222585678100586, + "learning_rate": 5.187826254879874e-07, + "loss": 0.2022, + "step": 25923, + "teacher_loss": 0.19440282881259918 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.470312237739563, + "learning_rate": 5.181906315182289e-07, + "loss": 0.2323, + "step": 25924, + "teacher_loss": 0.20589923858642578 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.4206134080886841, + "learning_rate": 5.175989695729432e-07, + "loss": 0.2463, + "step": 25925, + "teacher_loss": 0.2269621193408966 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.5262861847877502, + "learning_rate": 5.170076396656931e-07, + "loss": 0.3077, + "step": 25926, + "teacher_loss": 0.2834537625312805 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.20970401167869568, + "learning_rate": 5.164166418100341e-07, + "loss": 0.1903, + "step": 25927, + "teacher_loss": 0.18818338215351105 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.51181960105896, + "learning_rate": 5.158259760195155e-07, + "loss": 0.2269, + "step": 25928, + "teacher_loss": 0.19521787762641907 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3169773817062378, + "learning_rate": 5.152356423076848e-07, + "loss": 0.1959, + "step": 25929, + "teacher_loss": 0.18239985406398773 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.37351781129837036, + "learning_rate": 5.146456406880745e-07, + "loss": 0.2204, + "step": 25930, + "teacher_loss": 0.20340529084205627 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.7145411968231201, + "learning_rate": 5.140559711742071e-07, + "loss": 0.2744, + "step": 25931, + "teacher_loss": 0.2254788875579834 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.14445710182189941, + "learning_rate": 5.134666337796051e-07, + "loss": 0.2229, + "step": 25932, + "teacher_loss": 0.23160099983215332 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.5382006168365479, + "learning_rate": 5.128776285177827e-07, + "loss": 0.2505, + "step": 25933, + "teacher_loss": 0.21857401728630066 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3074144423007965, + "learning_rate": 5.122889554022392e-07, + "loss": 0.2199, + "step": 25934, + "teacher_loss": 0.2101493775844574 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.45264333486557007, + "learning_rate": 5.117006144464736e-07, + "loss": 0.2588, + "step": 25935, + "teacher_loss": 0.23724156618118286 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.4242282211780548, + "learning_rate": 5.11112605663977e-07, + "loss": 0.2144, + "step": 25936, + "teacher_loss": 0.1910373866558075 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.1507379114627838, + "learning_rate": 5.105249290682267e-07, + "loss": 0.1449, + "step": 25937, + "teacher_loss": 0.14425115287303925 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3932763934135437, + "learning_rate": 5.099375846726972e-07, + "loss": 0.2092, + "step": 25938, + "teacher_loss": 0.18879303336143494 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.19561371207237244, + "learning_rate": 5.093505724908576e-07, + "loss": 0.2192, + "step": 25939, + "teacher_loss": 0.22182133793830872 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.30664703249931335, + "learning_rate": 5.087638925361621e-07, + "loss": 0.169, + "step": 25940, + "teacher_loss": 0.15368971228599548 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 1.2652349472045898, + "learning_rate": 5.081775448220666e-07, + "loss": 0.2993, + "step": 25941, + "teacher_loss": 0.19198143482208252 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.4722893238067627, + "learning_rate": 5.075915293620087e-07, + "loss": 0.2134, + "step": 25942, + "teacher_loss": 0.18463854491710663 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.601287305355072, + "learning_rate": 5.070058461694261e-07, + "loss": 0.1649, + "step": 25943, + "teacher_loss": 0.11640842258930206 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.39461201429367065, + "learning_rate": 5.064204952577494e-07, + "loss": 0.2429, + "step": 25944, + "teacher_loss": 0.225990429520607 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.15408770740032196, + "learning_rate": 5.05835476640395e-07, + "loss": 0.1635, + "step": 25945, + "teacher_loss": 0.16453981399536133 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.4335036277770996, + "learning_rate": 5.052507903307785e-07, + "loss": 0.1624, + "step": 25946, + "teacher_loss": 0.13228052854537964 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.5900661945343018, + "learning_rate": 5.046664363423042e-07, + "loss": 0.2615, + "step": 25947, + "teacher_loss": 0.22496896982192993 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.6857079267501831, + "learning_rate": 5.040824146883665e-07, + "loss": 0.2572, + "step": 25948, + "teacher_loss": 0.20953483879566193 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3605722188949585, + "learning_rate": 5.034987253823614e-07, + "loss": 0.173, + "step": 25949, + "teacher_loss": 0.15216027200222015 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.5704779624938965, + "learning_rate": 5.029153684376664e-07, + "loss": 0.278, + "step": 25950, + "teacher_loss": 0.24549362063407898 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.26498937606811523, + "learning_rate": 5.023323438676558e-07, + "loss": 0.197, + "step": 25951, + "teacher_loss": 0.1894627809524536 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.38061094284057617, + "learning_rate": 5.017496516857006e-07, + "loss": 0.1667, + "step": 25952, + "teacher_loss": 0.14294137060642242 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.2750745415687561, + "learning_rate": 5.011672919051569e-07, + "loss": 0.2452, + "step": 25953, + "teacher_loss": 0.24187231063842773 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.8193128108978271, + "learning_rate": 5.005852645393788e-07, + "loss": 0.3388, + "step": 25954, + "teacher_loss": 0.28544050455093384 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3445749878883362, + "learning_rate": 5.000035696017074e-07, + "loss": 0.1799, + "step": 25955, + "teacher_loss": 0.16154810786247253 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.6730453372001648, + "learning_rate": 4.994222071054805e-07, + "loss": 0.3977, + "step": 25956, + "teacher_loss": 0.3670666217803955 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3895450234413147, + "learning_rate": 4.988411770640306e-07, + "loss": 0.2199, + "step": 25957, + "teacher_loss": 0.2010941207408905 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.1975100040435791, + "learning_rate": 4.98260479490672e-07, + "loss": 0.1837, + "step": 25958, + "teacher_loss": 0.1822075992822647 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.347367525100708, + "learning_rate": 4.976801143987242e-07, + "loss": 0.239, + "step": 25959, + "teacher_loss": 0.2270033061504364 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.48298120498657227, + "learning_rate": 4.971000818014914e-07, + "loss": 0.2201, + "step": 25960, + "teacher_loss": 0.1908716857433319 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.8329836130142212, + "learning_rate": 4.965203817122699e-07, + "loss": 0.2998, + "step": 25961, + "teacher_loss": 0.24053703248500824 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.8319041728973389, + "learning_rate": 4.959410141443538e-07, + "loss": 0.2522, + "step": 25962, + "teacher_loss": 0.1877671480178833 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.6278356313705444, + "learning_rate": 4.953619791110242e-07, + "loss": 0.2482, + "step": 25963, + "teacher_loss": 0.20603244006633759 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.23842298984527588, + "learning_rate": 4.947832766255589e-07, + "loss": 0.2127, + "step": 25964, + "teacher_loss": 0.20988646149635315 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.7933729887008667, + "learning_rate": 4.942049067012205e-07, + "loss": 0.2218, + "step": 25965, + "teacher_loss": 0.15832099318504333 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.6896498203277588, + "learning_rate": 4.936268693512769e-07, + "loss": 0.246, + "step": 25966, + "teacher_loss": 0.19675500690937042 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.37444615364074707, + "learning_rate": 4.930491645889756e-07, + "loss": 0.1961, + "step": 25967, + "teacher_loss": 0.17629742622375488 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.48462975025177, + "learning_rate": 4.924717924275629e-07, + "loss": 0.2869, + "step": 25968, + "teacher_loss": 0.2649174928665161 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.12423699349164963, + "learning_rate": 4.918947528802748e-07, + "loss": 0.2151, + "step": 25969, + "teacher_loss": 0.22523370385169983 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.3906821012496948, + "learning_rate": 4.913180459603439e-07, + "loss": 0.2013, + "step": 25970, + "teacher_loss": 0.18030408024787903 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.4863739609718323, + "learning_rate": 4.907416716809898e-07, + "loss": 0.1923, + "step": 25971, + "teacher_loss": 0.15963782370090485 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.38046571612358093, + "learning_rate": 4.901656300554286e-07, + "loss": 0.2491, + "step": 25972, + "teacher_loss": 0.2345171868801117 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.1948980987071991, + "learning_rate": 4.89589921096868e-07, + "loss": 0.156, + "step": 25973, + "teacher_loss": 0.15168017148971558 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.7786871194839478, + "learning_rate": 4.890145448185041e-07, + "loss": 0.2363, + "step": 25974, + "teacher_loss": 0.17602795362472534 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.28241682052612305, + "learning_rate": 4.884395012335313e-07, + "loss": 0.2092, + "step": 25975, + "teacher_loss": 0.20102903246879578 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.7610867619514465, + "learning_rate": 4.878647903551342e-07, + "loss": 0.2401, + "step": 25976, + "teacher_loss": 0.18215903639793396 + }, + { + "compression_loss": 0.0, + "epoch": 4.69, + "label_loss": 0.37485358119010925, + "learning_rate": 4.872904121964872e-07, + "loss": 0.1723, + "step": 25977, + "teacher_loss": 0.14974729716777802 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.33419930934906006, + "learning_rate": 4.867163667707564e-07, + "loss": 0.1919, + "step": 25978, + "teacher_loss": 0.17606335878372192 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4441710114479065, + "learning_rate": 4.861426540911095e-07, + "loss": 0.2016, + "step": 25979, + "teacher_loss": 0.17468340694904327 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.17039218544960022, + "learning_rate": 4.85569274170698e-07, + "loss": 0.1716, + "step": 25980, + "teacher_loss": 0.17173486948013306 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.20087842643260956, + "learning_rate": 4.849962270226644e-07, + "loss": 0.1825, + "step": 25981, + "teacher_loss": 0.18042081594467163 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.5726956725120544, + "learning_rate": 4.844235126601482e-07, + "loss": 0.2448, + "step": 25982, + "teacher_loss": 0.2083328664302826 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.41342154145240784, + "learning_rate": 4.838511310962823e-07, + "loss": 0.2378, + "step": 25983, + "teacher_loss": 0.21825474500656128 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.7882672548294067, + "learning_rate": 4.83279082344188e-07, + "loss": 0.245, + "step": 25984, + "teacher_loss": 0.1845826804637909 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.19746315479278564, + "learning_rate": 4.827073664169812e-07, + "loss": 0.1622, + "step": 25985, + "teacher_loss": 0.15832996368408203 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.3702055811882019, + "learning_rate": 4.8213598332777e-07, + "loss": 0.1977, + "step": 25986, + "teacher_loss": 0.17853793501853943 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.5302959680557251, + "learning_rate": 4.815649330896521e-07, + "loss": 0.2118, + "step": 25987, + "teacher_loss": 0.1764644831418991 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.15657463669776917, + "learning_rate": 4.809942157157221e-07, + "loss": 0.1356, + "step": 25988, + "teacher_loss": 0.1332722008228302 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.244287371635437, + "learning_rate": 4.804238312190662e-07, + "loss": 0.1509, + "step": 25989, + "teacher_loss": 0.14054623246192932 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.22543209791183472, + "learning_rate": 4.798537796127589e-07, + "loss": 0.1807, + "step": 25990, + "teacher_loss": 0.17573225498199463 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.342331200838089, + "learning_rate": 4.792840609098715e-07, + "loss": 0.2273, + "step": 25991, + "teacher_loss": 0.21451804041862488 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.6907057762145996, + "learning_rate": 4.787146751234634e-07, + "loss": 0.2637, + "step": 25992, + "teacher_loss": 0.2163037657737732 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.31875383853912354, + "learning_rate": 4.781456222665925e-07, + "loss": 0.1873, + "step": 25993, + "teacher_loss": 0.1726740002632141 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4331517517566681, + "learning_rate": 4.775769023523019e-07, + "loss": 0.236, + "step": 25994, + "teacher_loss": 0.21414396166801453 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.48033493757247925, + "learning_rate": 4.770085153936326e-07, + "loss": 0.2239, + "step": 25995, + "teacher_loss": 0.19539423286914825 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.23985852301120758, + "learning_rate": 4.7644046140361774e-07, + "loss": 0.1422, + "step": 25996, + "teacher_loss": 0.1313505619764328 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.3673751652240753, + "learning_rate": 4.758727403952784e-07, + "loss": 0.1761, + "step": 25997, + "teacher_loss": 0.154887855052948 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.625605046749115, + "learning_rate": 4.7530535238163087e-07, + "loss": 0.2318, + "step": 25998, + "teacher_loss": 0.18803386390209198 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4149981737136841, + "learning_rate": 4.747382973756847e-07, + "loss": 0.2519, + "step": 25999, + "teacher_loss": 0.23377925157546997 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.3353227376937866, + "learning_rate": 4.7417157539044133e-07, + "loss": 0.2534, + "step": 26000, + "teacher_loss": 0.2443416714668274 + }, + { + "epoch": 4.7, + "eval_exact_match": 80.51087984862819, + "eval_f1": 87.69368864892034, + "step": 26000 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4277428388595581, + "learning_rate": 4.7360518643889015e-07, + "loss": 0.2453, + "step": 26001, + "teacher_loss": 0.2250259518623352 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.44357770681381226, + "learning_rate": 4.730391305340226e-07, + "loss": 0.2073, + "step": 26002, + "teacher_loss": 0.18109026551246643 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.27479088306427, + "learning_rate": 4.724734076888132e-07, + "loss": 0.1647, + "step": 26003, + "teacher_loss": 0.15249595046043396 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.42205357551574707, + "learning_rate": 4.7190801791623326e-07, + "loss": 0.1689, + "step": 26004, + "teacher_loss": 0.1408035159111023 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.7555962800979614, + "learning_rate": 4.7134296122924246e-07, + "loss": 0.2225, + "step": 26005, + "teacher_loss": 0.16331183910369873 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.23449909687042236, + "learning_rate": 4.7077823764080043e-07, + "loss": 0.2155, + "step": 26006, + "teacher_loss": 0.2133558839559555 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.3199350833892822, + "learning_rate": 4.702138471638534e-07, + "loss": 0.1913, + "step": 26007, + "teacher_loss": 0.17700400948524475 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.6562459468841553, + "learning_rate": 4.6964978981133776e-07, + "loss": 0.1941, + "step": 26008, + "teacher_loss": 0.14278317987918854 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4295295476913452, + "learning_rate": 4.6908606559618985e-07, + "loss": 0.1941, + "step": 26009, + "teacher_loss": 0.16790884733200073 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.3958856463432312, + "learning_rate": 4.6852267453133257e-07, + "loss": 0.2367, + "step": 26010, + "teacher_loss": 0.21905523538589478 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.49395912885665894, + "learning_rate": 4.679596166296807e-07, + "loss": 0.3951, + "step": 26011, + "teacher_loss": 0.38416701555252075 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.38436219096183777, + "learning_rate": 4.673968919041488e-07, + "loss": 0.2148, + "step": 26012, + "teacher_loss": 0.19599804282188416 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.19209221005439758, + "learning_rate": 4.668345003676333e-07, + "loss": 0.1337, + "step": 26013, + "teacher_loss": 0.12716831266880035 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.3427913784980774, + "learning_rate": 4.6627244203303055e-07, + "loss": 0.1866, + "step": 26014, + "teacher_loss": 0.1692199558019638 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.12221767753362656, + "learning_rate": 4.657107169132252e-07, + "loss": 0.1393, + "step": 26015, + "teacher_loss": 0.14114607870578766 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.9144544005393982, + "learning_rate": 4.65149325021097e-07, + "loss": 0.2725, + "step": 26016, + "teacher_loss": 0.20118333399295807 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.17010632157325745, + "learning_rate": 4.645882663695189e-07, + "loss": 0.2034, + "step": 26017, + "teacher_loss": 0.20713108777999878 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.26602333784103394, + "learning_rate": 4.640275409713507e-07, + "loss": 0.1686, + "step": 26018, + "teacher_loss": 0.15772384405136108 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.34665191173553467, + "learning_rate": 4.634671488394504e-07, + "loss": 0.2201, + "step": 26019, + "teacher_loss": 0.20604264736175537 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4486544728279114, + "learning_rate": 4.629070899866661e-07, + "loss": 0.1953, + "step": 26020, + "teacher_loss": 0.16715312004089355 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.4529132544994354, + "learning_rate": 4.623473644258375e-07, + "loss": 0.1911, + "step": 26021, + "teacher_loss": 0.1620243340730667 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.28592199087142944, + "learning_rate": 4.617879721697976e-07, + "loss": 0.1505, + "step": 26022, + "teacher_loss": 0.1354852020740509 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.177555114030838, + "learning_rate": 4.6122891323137127e-07, + "loss": 0.1726, + "step": 26023, + "teacher_loss": 0.1719955950975418 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.22664940357208252, + "learning_rate": 4.6067018762337654e-07, + "loss": 0.173, + "step": 26024, + "teacher_loss": 0.1670922040939331 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.5536371469497681, + "learning_rate": 4.601117953586231e-07, + "loss": 0.1714, + "step": 26025, + "teacher_loss": 0.12895441055297852 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.5180556774139404, + "learning_rate": 4.5955373644991585e-07, + "loss": 0.198, + "step": 26026, + "teacher_loss": 0.16249239444732666 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.15433071553707123, + "learning_rate": 4.589960109100444e-07, + "loss": 0.149, + "step": 26027, + "teacher_loss": 0.14841899275779724 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.47172582149505615, + "learning_rate": 4.5843861875179694e-07, + "loss": 0.1866, + "step": 26028, + "teacher_loss": 0.15489903092384338 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.6010135412216187, + "learning_rate": 4.5788155998795653e-07, + "loss": 0.2847, + "step": 26029, + "teacher_loss": 0.24952954053878784 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.34171411395072937, + "learning_rate": 4.5732483463129305e-07, + "loss": 0.141, + "step": 26030, + "teacher_loss": 0.11872003972530365 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.5686646699905396, + "learning_rate": 4.5676844269456786e-07, + "loss": 0.1879, + "step": 26031, + "teacher_loss": 0.14561530947685242 + }, + { + "compression_loss": 0.0, + "epoch": 4.7, + "label_loss": 0.2814827263355255, + "learning_rate": 4.562123841905391e-07, + "loss": 0.1975, + "step": 26032, + "teacher_loss": 0.18817178905010223 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2973589599132538, + "learning_rate": 4.556566591319583e-07, + "loss": 0.1311, + "step": 26033, + "teacher_loss": 0.11266306787729263 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6057367324829102, + "learning_rate": 4.551012675315619e-07, + "loss": 0.2224, + "step": 26034, + "teacher_loss": 0.17985178530216217 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.08601963520050049, + "learning_rate": 4.5454620940208634e-07, + "loss": 0.1304, + "step": 26035, + "teacher_loss": 0.13536955416202545 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.7770828604698181, + "learning_rate": 4.5399148475625816e-07, + "loss": 0.2458, + "step": 26036, + "teacher_loss": 0.1867215633392334 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.20638640224933624, + "learning_rate": 4.534370936067922e-07, + "loss": 0.123, + "step": 26037, + "teacher_loss": 0.11370199918746948 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3812705874443054, + "learning_rate": 4.528830359663999e-07, + "loss": 0.2054, + "step": 26038, + "teacher_loss": 0.18582221865653992 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.48597636818885803, + "learning_rate": 4.5232931184778783e-07, + "loss": 0.2691, + "step": 26039, + "teacher_loss": 0.2450440376996994 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.35701578855514526, + "learning_rate": 4.5177592126364744e-07, + "loss": 0.2012, + "step": 26040, + "teacher_loss": 0.18386715650558472 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2949666380882263, + "learning_rate": 4.512228642266653e-07, + "loss": 0.2485, + "step": 26041, + "teacher_loss": 0.243296816945076 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3568854331970215, + "learning_rate": 4.506701407495245e-07, + "loss": 0.2041, + "step": 26042, + "teacher_loss": 0.18707525730133057 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.26589435338974, + "learning_rate": 4.5011775084489837e-07, + "loss": 0.2396, + "step": 26043, + "teacher_loss": 0.23667283356189728 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.16364212334156036, + "learning_rate": 4.495656945254467e-07, + "loss": 0.1627, + "step": 26044, + "teacher_loss": 0.16265049576759338 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.11040471494197845, + "learning_rate": 4.490139718038294e-07, + "loss": 0.1214, + "step": 26045, + "teacher_loss": 0.12262696027755737 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6463939547538757, + "learning_rate": 4.48462582692698e-07, + "loss": 0.3244, + "step": 26046, + "teacher_loss": 0.2885853052139282 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.45146292448043823, + "learning_rate": 4.479115272046891e-07, + "loss": 0.2308, + "step": 26047, + "teacher_loss": 0.20631471276283264 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.49363085627555847, + "learning_rate": 4.4736080535244084e-07, + "loss": 0.1927, + "step": 26048, + "teacher_loss": 0.1592334806919098 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.4053407907485962, + "learning_rate": 4.468104171485782e-07, + "loss": 0.2322, + "step": 26049, + "teacher_loss": 0.21292461454868317 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3578922748565674, + "learning_rate": 4.4626036260571937e-07, + "loss": 0.3275, + "step": 26050, + "teacher_loss": 0.32410258054733276 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2743688225746155, + "learning_rate": 4.457106417364759e-07, + "loss": 0.2011, + "step": 26051, + "teacher_loss": 0.19299355149269104 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.262364000082016, + "learning_rate": 4.451612545534528e-07, + "loss": 0.1854, + "step": 26052, + "teacher_loss": 0.17681050300598145 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.31732919812202454, + "learning_rate": 4.4461220106924494e-07, + "loss": 0.1853, + "step": 26053, + "teacher_loss": 0.17058789730072021 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 1.1371077299118042, + "learning_rate": 4.440634812964373e-07, + "loss": 0.3412, + "step": 26054, + "teacher_loss": 0.25280189514160156 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.5069007873535156, + "learning_rate": 4.4351509524761466e-07, + "loss": 0.3737, + "step": 26055, + "teacher_loss": 0.35888397693634033 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.5399612188339233, + "learning_rate": 4.4296704293534886e-07, + "loss": 0.2601, + "step": 26056, + "teacher_loss": 0.22903326153755188 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.27342677116394043, + "learning_rate": 4.42419324372203e-07, + "loss": 0.2176, + "step": 26057, + "teacher_loss": 0.21142691373825073 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3868868947029114, + "learning_rate": 4.418719395707355e-07, + "loss": 0.22, + "step": 26058, + "teacher_loss": 0.20146706700325012 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.5153133273124695, + "learning_rate": 4.413248885434995e-07, + "loss": 0.2056, + "step": 26059, + "teacher_loss": 0.17118126153945923 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.48697638511657715, + "learning_rate": 4.4077817130303344e-07, + "loss": 0.2276, + "step": 26060, + "teacher_loss": 0.19880172610282898 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.768515408039093, + "learning_rate": 4.402317878618722e-07, + "loss": 0.2387, + "step": 26061, + "teacher_loss": 0.17979514598846436 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.5323460102081299, + "learning_rate": 4.396857382325459e-07, + "loss": 0.1754, + "step": 26062, + "teacher_loss": 0.13569381833076477 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 1.0058705806732178, + "learning_rate": 4.3914002242756934e-07, + "loss": 0.3528, + "step": 26063, + "teacher_loss": 0.28025805950164795 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.4521084725856781, + "learning_rate": 4.385946404594576e-07, + "loss": 0.2267, + "step": 26064, + "teacher_loss": 0.2017059475183487 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.27968937158584595, + "learning_rate": 4.380495923407124e-07, + "loss": 0.1706, + "step": 26065, + "teacher_loss": 0.1584520936012268 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.45004379749298096, + "learning_rate": 4.3750487808383033e-07, + "loss": 0.2086, + "step": 26066, + "teacher_loss": 0.1817297786474228 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2527501583099365, + "learning_rate": 4.3696049770130307e-07, + "loss": 0.1943, + "step": 26067, + "teacher_loss": 0.18780942261219025 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.605048656463623, + "learning_rate": 4.364164512056074e-07, + "loss": 0.3141, + "step": 26068, + "teacher_loss": 0.2817923426628113 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.7627030611038208, + "learning_rate": 4.3587273860921985e-07, + "loss": 0.2592, + "step": 26069, + "teacher_loss": 0.20321446657180786 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.18822041153907776, + "learning_rate": 4.353293599246055e-07, + "loss": 0.1578, + "step": 26070, + "teacher_loss": 0.15437328815460205 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2526842951774597, + "learning_rate": 4.3478631516421954e-07, + "loss": 0.1656, + "step": 26071, + "teacher_loss": 0.15592005848884583 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.34720659255981445, + "learning_rate": 4.342436043405168e-07, + "loss": 0.1882, + "step": 26072, + "teacher_loss": 0.1705521047115326 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2369542121887207, + "learning_rate": 4.3370122746593575e-07, + "loss": 0.1394, + "step": 26073, + "teacher_loss": 0.12855711579322815 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3447191119194031, + "learning_rate": 4.331591845529148e-07, + "loss": 0.197, + "step": 26074, + "teacher_loss": 0.18061551451683044 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2550721764564514, + "learning_rate": 4.3261747561388063e-07, + "loss": 0.2302, + "step": 26075, + "teacher_loss": 0.2274158000946045 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6777764558792114, + "learning_rate": 4.320761006612517e-07, + "loss": 0.2143, + "step": 26076, + "teacher_loss": 0.16282817721366882 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.42767333984375, + "learning_rate": 4.315350597074413e-07, + "loss": 0.2211, + "step": 26077, + "teacher_loss": 0.19819800555706024 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.7305760383605957, + "learning_rate": 4.30994352764853e-07, + "loss": 0.3295, + "step": 26078, + "teacher_loss": 0.284932017326355 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.9923133850097656, + "learning_rate": 4.304539798458834e-07, + "loss": 0.2509, + "step": 26079, + "teacher_loss": 0.1685442328453064 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6449244618415833, + "learning_rate": 4.299139409629244e-07, + "loss": 0.293, + "step": 26080, + "teacher_loss": 0.2539238929748535 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.2612766921520233, + "learning_rate": 4.293742361283526e-07, + "loss": 0.1645, + "step": 26081, + "teacher_loss": 0.15378305315971375 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.16080445051193237, + "learning_rate": 4.2883486535454483e-07, + "loss": 0.126, + "step": 26082, + "teacher_loss": 0.12217552214860916 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3531619608402252, + "learning_rate": 4.282958286538696e-07, + "loss": 0.204, + "step": 26083, + "teacher_loss": 0.1874670386314392 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6026806831359863, + "learning_rate": 4.2775712603868036e-07, + "loss": 0.2629, + "step": 26084, + "teacher_loss": 0.22519998252391815 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.873207688331604, + "learning_rate": 4.2721875752133044e-07, + "loss": 0.3177, + "step": 26085, + "teacher_loss": 0.25598400831222534 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6222789883613586, + "learning_rate": 4.2668072311416504e-07, + "loss": 0.2591, + "step": 26086, + "teacher_loss": 0.21873164176940918 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.6674565076828003, + "learning_rate": 4.2614302282951766e-07, + "loss": 0.3274, + "step": 26087, + "teacher_loss": 0.28959396481513977 + }, + { + "compression_loss": 0.0, + "epoch": 4.71, + "label_loss": 0.3951818346977234, + "learning_rate": 4.2560565667971173e-07, + "loss": 0.2112, + "step": 26088, + "teacher_loss": 0.19078782200813293 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.43137240409851074, + "learning_rate": 4.2506862467707575e-07, + "loss": 0.2823, + "step": 26089, + "teacher_loss": 0.26569920778274536 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.21788080036640167, + "learning_rate": 4.245319268339198e-07, + "loss": 0.2099, + "step": 26090, + "teacher_loss": 0.20898719131946564 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.42294490337371826, + "learning_rate": 4.2399556316254254e-07, + "loss": 0.2089, + "step": 26091, + "teacher_loss": 0.1850709468126297 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.11135952174663544, + "learning_rate": 4.234595336752489e-07, + "loss": 0.1277, + "step": 26092, + "teacher_loss": 0.12953317165374756 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.5950625538825989, + "learning_rate": 4.2292383838432593e-07, + "loss": 0.2582, + "step": 26093, + "teacher_loss": 0.2207736223936081 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6008852124214172, + "learning_rate": 4.22388477302052e-07, + "loss": 0.2014, + "step": 26094, + "teacher_loss": 0.1570214331150055 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6806545257568359, + "learning_rate": 4.218534504407057e-07, + "loss": 0.3025, + "step": 26095, + "teacher_loss": 0.2604466676712036 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6116722226142883, + "learning_rate": 4.213187578125538e-07, + "loss": 0.2604, + "step": 26096, + "teacher_loss": 0.22138960659503937 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.28433072566986084, + "learning_rate": 4.207843994298516e-07, + "loss": 0.2057, + "step": 26097, + "teacher_loss": 0.19693398475646973 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.401202917098999, + "learning_rate": 4.2025037530485256e-07, + "loss": 0.2275, + "step": 26098, + "teacher_loss": 0.20817461609840393 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.3467712700366974, + "learning_rate": 4.197166854498019e-07, + "loss": 0.163, + "step": 26099, + "teacher_loss": 0.14263172447681427 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.2930585741996765, + "learning_rate": 4.191833298769332e-07, + "loss": 0.1743, + "step": 26100, + "teacher_loss": 0.16114750504493713 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.38431665301322937, + "learning_rate": 4.186503085984733e-07, + "loss": 0.2414, + "step": 26101, + "teacher_loss": 0.22548821568489075 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.14645330607891083, + "learning_rate": 4.181176216266458e-07, + "loss": 0.1307, + "step": 26102, + "teacher_loss": 0.1289098858833313 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.14754296839237213, + "learning_rate": 4.175852689736642e-07, + "loss": 0.1626, + "step": 26103, + "teacher_loss": 0.16422373056411743 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 1.0099220275878906, + "learning_rate": 4.1705325065172883e-07, + "loss": 0.2714, + "step": 26104, + "teacher_loss": 0.18939325213432312 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6340831518173218, + "learning_rate": 4.165215666730415e-07, + "loss": 0.2488, + "step": 26105, + "teacher_loss": 0.20595046877861023 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.41640201210975647, + "learning_rate": 4.1599021704979257e-07, + "loss": 0.1957, + "step": 26106, + "teacher_loss": 0.1711452603340149 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.565737247467041, + "learning_rate": 4.154592017941622e-07, + "loss": 0.2176, + "step": 26107, + "teacher_loss": 0.1788749396800995 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.2760319709777832, + "learning_rate": 4.14928520918324e-07, + "loss": 0.2199, + "step": 26108, + "teacher_loss": 0.21361616253852844 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.25048866868019104, + "learning_rate": 4.143981744344483e-07, + "loss": 0.165, + "step": 26109, + "teacher_loss": 0.15544962882995605 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.41958439350128174, + "learning_rate": 4.138681623546919e-07, + "loss": 0.1934, + "step": 26110, + "teacher_loss": 0.16822870075702667 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.33993959426879883, + "learning_rate": 4.133384846912069e-07, + "loss": 0.1954, + "step": 26111, + "teacher_loss": 0.17929381132125854 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.35421645641326904, + "learning_rate": 4.128091414561386e-07, + "loss": 0.2196, + "step": 26112, + "teacher_loss": 0.2045917510986328 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.624624490737915, + "learning_rate": 4.1228013266162044e-07, + "loss": 0.2884, + "step": 26113, + "teacher_loss": 0.2510247230529785 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6498924493789673, + "learning_rate": 4.1175145831978454e-07, + "loss": 0.3514, + "step": 26114, + "teacher_loss": 0.3182254433631897 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.24432280659675598, + "learning_rate": 4.1122311844274786e-07, + "loss": 0.1871, + "step": 26115, + "teacher_loss": 0.18077883124351501 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.3105872869491577, + "learning_rate": 4.106951130426273e-07, + "loss": 0.2795, + "step": 26116, + "teacher_loss": 0.2759951055049896 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6618967056274414, + "learning_rate": 4.101674421315249e-07, + "loss": 0.2313, + "step": 26117, + "teacher_loss": 0.1834862232208252 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.460035502910614, + "learning_rate": 4.0964010572154096e-07, + "loss": 0.2017, + "step": 26118, + "teacher_loss": 0.17295563220977783 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.4694027304649353, + "learning_rate": 4.0911310382476754e-07, + "loss": 0.2766, + "step": 26119, + "teacher_loss": 0.2551938593387604 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.2026854008436203, + "learning_rate": 4.0858643645328155e-07, + "loss": 0.158, + "step": 26120, + "teacher_loss": 0.15297973155975342 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.428189754486084, + "learning_rate": 4.080601036191617e-07, + "loss": 0.3135, + "step": 26121, + "teacher_loss": 0.3007040023803711 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.3920612335205078, + "learning_rate": 4.0753410533447665e-07, + "loss": 0.2, + "step": 26122, + "teacher_loss": 0.1787053346633911 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.12669916450977325, + "learning_rate": 4.0700844161128345e-07, + "loss": 0.1383, + "step": 26123, + "teacher_loss": 0.13960862159729004 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.8708757162094116, + "learning_rate": 4.0648311246163416e-07, + "loss": 0.2849, + "step": 26124, + "teacher_loss": 0.21980786323547363 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.8162716627120972, + "learning_rate": 4.059581178975741e-07, + "loss": 0.279, + "step": 26125, + "teacher_loss": 0.21927893161773682 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6057848930358887, + "learning_rate": 4.054334579311386e-07, + "loss": 0.3848, + "step": 26126, + "teacher_loss": 0.3601934611797333 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.4929184317588806, + "learning_rate": 4.0490913257435813e-07, + "loss": 0.2869, + "step": 26127, + "teacher_loss": 0.26400449872016907 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.6739277243614197, + "learning_rate": 4.0438514183925135e-07, + "loss": 0.2495, + "step": 26128, + "teacher_loss": 0.20239630341529846 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.42070162296295166, + "learning_rate": 4.038614857378337e-07, + "loss": 0.225, + "step": 26129, + "teacher_loss": 0.20326349139213562 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.31354451179504395, + "learning_rate": 4.033381642821121e-07, + "loss": 0.182, + "step": 26130, + "teacher_loss": 0.1674191653728485 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.40662646293640137, + "learning_rate": 4.0281517748408217e-07, + "loss": 0.2133, + "step": 26131, + "teacher_loss": 0.19178417325019836 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.4946771562099457, + "learning_rate": 4.0229252535573413e-07, + "loss": 0.2249, + "step": 26132, + "teacher_loss": 0.19493868947029114 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.5274799466133118, + "learning_rate": 4.017702079090552e-07, + "loss": 0.1977, + "step": 26133, + "teacher_loss": 0.16105802357196808 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.3925546109676361, + "learning_rate": 4.012482251560157e-07, + "loss": 0.1737, + "step": 26134, + "teacher_loss": 0.14933812618255615 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.41620999574661255, + "learning_rate": 4.0072657710858776e-07, + "loss": 0.2483, + "step": 26135, + "teacher_loss": 0.22965747117996216 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.41546422243118286, + "learning_rate": 4.002052637787251e-07, + "loss": 0.1976, + "step": 26136, + "teacher_loss": 0.17342418432235718 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.4069422483444214, + "learning_rate": 3.9968428517838495e-07, + "loss": 0.1785, + "step": 26137, + "teacher_loss": 0.15311656892299652 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.3098655939102173, + "learning_rate": 3.991636413195093e-07, + "loss": 0.2085, + "step": 26138, + "teacher_loss": 0.19723233580589294 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.28139638900756836, + "learning_rate": 3.98643332214037e-07, + "loss": 0.2009, + "step": 26139, + "teacher_loss": 0.19198496639728546 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.2162252813577652, + "learning_rate": 3.981233578738952e-07, + "loss": 0.2662, + "step": 26140, + "teacher_loss": 0.2717888355255127 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.255289226770401, + "learning_rate": 3.9760371831100594e-07, + "loss": 0.2187, + "step": 26141, + "teacher_loss": 0.21460360288619995 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.4985791742801666, + "learning_rate": 3.970844135372831e-07, + "loss": 0.186, + "step": 26142, + "teacher_loss": 0.1512521356344223 + }, + { + "compression_loss": 0.0, + "epoch": 4.72, + "label_loss": 0.7005516290664673, + "learning_rate": 3.9656544356463375e-07, + "loss": 0.2162, + "step": 26143, + "teacher_loss": 0.1623440384864807 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 1.2586867809295654, + "learning_rate": 3.9604680840495345e-07, + "loss": 0.3848, + "step": 26144, + "teacher_loss": 0.2877376079559326 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.18884801864624023, + "learning_rate": 3.9552850807013595e-07, + "loss": 0.1459, + "step": 26145, + "teacher_loss": 0.14108259975910187 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.2994023859500885, + "learning_rate": 3.9501054257206514e-07, + "loss": 0.3531, + "step": 26146, + "teacher_loss": 0.35908254981040955 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.5666463375091553, + "learning_rate": 3.9449291192261317e-07, + "loss": 0.3425, + "step": 26147, + "teacher_loss": 0.31757134199142456 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.6052114963531494, + "learning_rate": 3.939756161336472e-07, + "loss": 0.272, + "step": 26148, + "teacher_loss": 0.23494423925876617 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.17341214418411255, + "learning_rate": 3.9345865521703273e-07, + "loss": 0.2166, + "step": 26149, + "teacher_loss": 0.22141587734222412 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.3797691762447357, + "learning_rate": 3.9294202918461694e-07, + "loss": 0.2345, + "step": 26150, + "teacher_loss": 0.21834754943847656 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.7920696139335632, + "learning_rate": 3.9242573804824543e-07, + "loss": 0.3129, + "step": 26151, + "teacher_loss": 0.2597135305404663 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.48059970140457153, + "learning_rate": 3.91909781819757e-07, + "loss": 0.1888, + "step": 26152, + "teacher_loss": 0.15642619132995605 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.4991934299468994, + "learning_rate": 3.9139416051098053e-07, + "loss": 0.2204, + "step": 26153, + "teacher_loss": 0.18938395380973816 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.6187795996665955, + "learning_rate": 3.908788741337349e-07, + "loss": 0.3121, + "step": 26154, + "teacher_loss": 0.27805233001708984 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.3074015974998474, + "learning_rate": 3.9036392269983736e-07, + "loss": 0.2087, + "step": 26155, + "teacher_loss": 0.19772422313690186 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.7527005076408386, + "learning_rate": 3.8984930622109507e-07, + "loss": 0.2548, + "step": 26156, + "teacher_loss": 0.19952982664108276 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.17522814869880676, + "learning_rate": 3.893350247093019e-07, + "loss": 0.2196, + "step": 26157, + "teacher_loss": 0.22454112768173218 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.31965523958206177, + "learning_rate": 3.888210781762519e-07, + "loss": 0.3283, + "step": 26158, + "teacher_loss": 0.3292592167854309 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.5545949339866638, + "learning_rate": 3.8830746663372886e-07, + "loss": 0.2564, + "step": 26159, + "teacher_loss": 0.2232506275177002 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.35756444931030273, + "learning_rate": 3.8779419009350837e-07, + "loss": 0.1973, + "step": 26160, + "teacher_loss": 0.17946264147758484 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.5310564041137695, + "learning_rate": 3.8728124856735435e-07, + "loss": 0.2763, + "step": 26161, + "teacher_loss": 0.24804642796516418 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.19012349843978882, + "learning_rate": 3.867686420670341e-07, + "loss": 0.1359, + "step": 26162, + "teacher_loss": 0.12989000976085663 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.26397842168807983, + "learning_rate": 3.862563706042949e-07, + "loss": 0.1557, + "step": 26163, + "teacher_loss": 0.1436896026134491 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.731393575668335, + "learning_rate": 3.8574443419088057e-07, + "loss": 0.2783, + "step": 26164, + "teacher_loss": 0.22794075310230255 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.11459334194660187, + "learning_rate": 3.852328328385318e-07, + "loss": 0.1372, + "step": 26165, + "teacher_loss": 0.13972651958465576 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.3915087580680847, + "learning_rate": 3.8472156655897926e-07, + "loss": 0.1979, + "step": 26166, + "teacher_loss": 0.17642842233181 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.25729283690452576, + "learning_rate": 3.842106353639385e-07, + "loss": 0.2336, + "step": 26167, + "teacher_loss": 0.230980783700943 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.4644719958305359, + "learning_rate": 3.837000392651285e-07, + "loss": 0.2421, + "step": 26168, + "teacher_loss": 0.21739768981933594 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.19753041863441467, + "learning_rate": 3.831897782742566e-07, + "loss": 0.1821, + "step": 26169, + "teacher_loss": 0.1803959608078003 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.9388679265975952, + "learning_rate": 3.826798524030184e-07, + "loss": 0.3157, + "step": 26170, + "teacher_loss": 0.24650216102600098 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.6640670299530029, + "learning_rate": 3.8217026166310454e-07, + "loss": 0.24, + "step": 26171, + "teacher_loss": 0.19287711381912231 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.48650598526000977, + "learning_rate": 3.8166100606620234e-07, + "loss": 0.2109, + "step": 26172, + "teacher_loss": 0.18028786778450012 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.28243106603622437, + "learning_rate": 3.8115208562398417e-07, + "loss": 0.3322, + "step": 26173, + "teacher_loss": 0.33770421147346497 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.5037869215011597, + "learning_rate": 3.80643500348119e-07, + "loss": 0.18, + "step": 26174, + "teacher_loss": 0.14400310814380646 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.33066973090171814, + "learning_rate": 3.8013525025026916e-07, + "loss": 0.1794, + "step": 26175, + "teacher_loss": 0.16258709132671356 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.2360270917415619, + "learning_rate": 3.7962733534208536e-07, + "loss": 0.1916, + "step": 26176, + "teacher_loss": 0.18664349615573883 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.21871665120124817, + "learning_rate": 3.7911975563520995e-07, + "loss": 0.2018, + "step": 26177, + "teacher_loss": 0.1999211609363556 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.3610538840293884, + "learning_rate": 3.786125111412836e-07, + "loss": 0.2254, + "step": 26178, + "teacher_loss": 0.21032965183258057 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.23414433002471924, + "learning_rate": 3.78105601871937e-07, + "loss": 0.2504, + "step": 26179, + "teacher_loss": 0.25220608711242676 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.5131825804710388, + "learning_rate": 3.775990278387875e-07, + "loss": 0.1804, + "step": 26180, + "teacher_loss": 0.1434488743543625 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.27662405371665955, + "learning_rate": 3.7709278905345255e-07, + "loss": 0.1833, + "step": 26181, + "teacher_loss": 0.17290785908699036 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.23825427889823914, + "learning_rate": 3.7658688552754116e-07, + "loss": 0.1539, + "step": 26182, + "teacher_loss": 0.14451012015342712 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.1817144751548767, + "learning_rate": 3.760813172726457e-07, + "loss": 0.1536, + "step": 26183, + "teacher_loss": 0.15052466094493866 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.406525194644928, + "learning_rate": 3.755760843003619e-07, + "loss": 0.2731, + "step": 26184, + "teacher_loss": 0.2582969665527344 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.2960628569126129, + "learning_rate": 3.750711866222739e-07, + "loss": 0.1623, + "step": 26185, + "teacher_loss": 0.14741212129592896 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.19659802317619324, + "learning_rate": 3.74566624249954e-07, + "loss": 0.1973, + "step": 26186, + "teacher_loss": 0.19743013381958008 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.46065518260002136, + "learning_rate": 3.74062397194973e-07, + "loss": 0.2236, + "step": 26187, + "teacher_loss": 0.1972934901714325 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.1878177523612976, + "learning_rate": 3.7355850546889e-07, + "loss": 0.1964, + "step": 26188, + "teacher_loss": 0.19739489257335663 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.2511180639266968, + "learning_rate": 3.7305494908325744e-07, + "loss": 0.1808, + "step": 26189, + "teacher_loss": 0.17302405834197998 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.2679293155670166, + "learning_rate": 3.725517280496227e-07, + "loss": 0.2293, + "step": 26190, + "teacher_loss": 0.22504764795303345 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.2622528374195099, + "learning_rate": 3.7204884237951987e-07, + "loss": 0.1406, + "step": 26191, + "teacher_loss": 0.127126544713974 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.47975707054138184, + "learning_rate": 3.7154629208447974e-07, + "loss": 0.2444, + "step": 26192, + "teacher_loss": 0.21826599538326263 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.12180589139461517, + "learning_rate": 3.7104407717602815e-07, + "loss": 0.1292, + "step": 26193, + "teacher_loss": 0.13007289171218872 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.8054293990135193, + "learning_rate": 3.7054219766567256e-07, + "loss": 0.3094, + "step": 26194, + "teacher_loss": 0.2542540431022644 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.564100444316864, + "learning_rate": 3.700406535649237e-07, + "loss": 0.2136, + "step": 26195, + "teacher_loss": 0.1746487021446228 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.4617871642112732, + "learning_rate": 3.695394448852807e-07, + "loss": 0.1909, + "step": 26196, + "teacher_loss": 0.1607992947101593 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.27024996280670166, + "learning_rate": 3.6903857163823274e-07, + "loss": 0.211, + "step": 26197, + "teacher_loss": 0.20439936220645905 + }, + { + "compression_loss": 0.0, + "epoch": 4.73, + "label_loss": 0.11742550134658813, + "learning_rate": 3.6853803383526565e-07, + "loss": 0.1543, + "step": 26198, + "teacher_loss": 0.15844564139842987 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.40123188495635986, + "learning_rate": 3.680378314878535e-07, + "loss": 0.2098, + "step": 26199, + "teacher_loss": 0.188495472073555 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.6872580051422119, + "learning_rate": 3.675379646074656e-07, + "loss": 0.2847, + "step": 26200, + "teacher_loss": 0.23996597528457642 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.6345182657241821, + "learning_rate": 3.67038433205561e-07, + "loss": 0.2273, + "step": 26201, + "teacher_loss": 0.1820010393857956 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.39220041036605835, + "learning_rate": 3.665392372935922e-07, + "loss": 0.3509, + "step": 26202, + "teacher_loss": 0.3463330864906311 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.112884022295475, + "learning_rate": 3.6604037688300676e-07, + "loss": 0.1651, + "step": 26203, + "teacher_loss": 0.1709403693675995 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.38889506459236145, + "learning_rate": 3.655418519852405e-07, + "loss": 0.2284, + "step": 26204, + "teacher_loss": 0.2105635404586792 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.33332061767578125, + "learning_rate": 3.6504366261172263e-07, + "loss": 0.2127, + "step": 26205, + "teacher_loss": 0.1992582380771637 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5565102100372314, + "learning_rate": 3.6454580877387567e-07, + "loss": 0.2361, + "step": 26206, + "teacher_loss": 0.20051544904708862 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.16748939454555511, + "learning_rate": 3.6404829048311384e-07, + "loss": 0.1792, + "step": 26207, + "teacher_loss": 0.18048053979873657 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.3263118863105774, + "learning_rate": 3.6355110775084467e-07, + "loss": 0.1656, + "step": 26208, + "teacher_loss": 0.14771801233291626 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.3977610170841217, + "learning_rate": 3.630542605884657e-07, + "loss": 0.1966, + "step": 26209, + "teacher_loss": 0.17423516511917114 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.14937418699264526, + "learning_rate": 3.625577490073695e-07, + "loss": 0.1239, + "step": 26210, + "teacher_loss": 0.12106631696224213 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.3014680743217468, + "learning_rate": 3.6206157301893695e-07, + "loss": 0.1704, + "step": 26211, + "teacher_loss": 0.1558433622121811 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5079413652420044, + "learning_rate": 3.6156573263454727e-07, + "loss": 0.2183, + "step": 26212, + "teacher_loss": 0.1861562728881836 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.46815937757492065, + "learning_rate": 3.610702278655681e-07, + "loss": 0.1905, + "step": 26213, + "teacher_loss": 0.15967296063899994 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.30157017707824707, + "learning_rate": 3.6057505872335527e-07, + "loss": 0.2238, + "step": 26214, + "teacher_loss": 0.21517033874988556 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4151687026023865, + "learning_rate": 3.600802252192681e-07, + "loss": 0.2399, + "step": 26215, + "teacher_loss": 0.22041943669319153 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.7945215702056885, + "learning_rate": 3.5958572736464913e-07, + "loss": 0.2648, + "step": 26216, + "teacher_loss": 0.205949604511261 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.42355990409851074, + "learning_rate": 3.5909156517083266e-07, + "loss": 0.2111, + "step": 26217, + "teacher_loss": 0.18747074902057648 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.2900615930557251, + "learning_rate": 3.585977386491512e-07, + "loss": 0.1492, + "step": 26218, + "teacher_loss": 0.1335279792547226 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.27687835693359375, + "learning_rate": 3.5810424781092754e-07, + "loss": 0.1833, + "step": 26219, + "teacher_loss": 0.17291682958602905 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5882298946380615, + "learning_rate": 3.5761109266747417e-07, + "loss": 0.2302, + "step": 26220, + "teacher_loss": 0.1904669553041458 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5582160949707031, + "learning_rate": 3.571182732300987e-07, + "loss": 0.2034, + "step": 26221, + "teacher_loss": 0.1639569252729416 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5095123052597046, + "learning_rate": 3.566257895101005e-07, + "loss": 0.2756, + "step": 26222, + "teacher_loss": 0.2496471405029297 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4840032458305359, + "learning_rate": 3.561336415187688e-07, + "loss": 0.194, + "step": 26223, + "teacher_loss": 0.16177856922149658 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.2597351670265198, + "learning_rate": 3.556418292673863e-07, + "loss": 0.1968, + "step": 26224, + "teacher_loss": 0.18977373838424683 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.43789416551589966, + "learning_rate": 3.55150352767234e-07, + "loss": 0.2061, + "step": 26225, + "teacher_loss": 0.1803433895111084 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.3409907817840576, + "learning_rate": 3.546592120295744e-07, + "loss": 0.147, + "step": 26226, + "teacher_loss": 0.12547388672828674 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.6100951433181763, + "learning_rate": 3.5416840706567035e-07, + "loss": 0.3121, + "step": 26227, + "teacher_loss": 0.27901214361190796 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.35740572214126587, + "learning_rate": 3.5367793788677437e-07, + "loss": 0.1897, + "step": 26228, + "teacher_loss": 0.1710374504327774 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 1.332581877708435, + "learning_rate": 3.5318780450413255e-07, + "loss": 0.4067, + "step": 26229, + "teacher_loss": 0.3038719594478607 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4084565043449402, + "learning_rate": 3.5269800692897925e-07, + "loss": 0.2451, + "step": 26230, + "teacher_loss": 0.2269318401813507 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.24748177826404572, + "learning_rate": 3.522085451725454e-07, + "loss": 0.242, + "step": 26231, + "teacher_loss": 0.24144527316093445 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.45086929202079773, + "learning_rate": 3.5171941924605543e-07, + "loss": 0.2021, + "step": 26232, + "teacher_loss": 0.17442019283771515 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.36637169122695923, + "learning_rate": 3.5123062916072033e-07, + "loss": 0.1946, + "step": 26233, + "teacher_loss": 0.17552852630615234 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.6955926418304443, + "learning_rate": 3.5074217492774616e-07, + "loss": 0.2219, + "step": 26234, + "teacher_loss": 0.1692207157611847 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.6517864465713501, + "learning_rate": 3.5025405655833564e-07, + "loss": 0.2825, + "step": 26235, + "teacher_loss": 0.24144214391708374 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4459074139595032, + "learning_rate": 3.4976627406367477e-07, + "loss": 0.2516, + "step": 26236, + "teacher_loss": 0.2300581932067871 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.29085665941238403, + "learning_rate": 3.49278827454953e-07, + "loss": 0.2075, + "step": 26237, + "teacher_loss": 0.1982668936252594 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4197015166282654, + "learning_rate": 3.4879171674333967e-07, + "loss": 0.1982, + "step": 26238, + "teacher_loss": 0.17358699440956116 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.20697128772735596, + "learning_rate": 3.483049419400075e-07, + "loss": 0.1879, + "step": 26239, + "teacher_loss": 0.18579816818237305 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5080598592758179, + "learning_rate": 3.478185030561126e-07, + "loss": 0.2192, + "step": 26240, + "teacher_loss": 0.18715938925743103 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.16831433773040771, + "learning_rate": 3.473324001028111e-07, + "loss": 0.1319, + "step": 26241, + "teacher_loss": 0.12785357236862183 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.2480318248271942, + "learning_rate": 3.468466330912473e-07, + "loss": 0.2338, + "step": 26242, + "teacher_loss": 0.2322331815958023 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.5446599721908569, + "learning_rate": 3.463612020325574e-07, + "loss": 0.169, + "step": 26243, + "teacher_loss": 0.12728184461593628 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.42143183946609497, + "learning_rate": 3.4587610693787075e-07, + "loss": 0.1947, + "step": 26244, + "teacher_loss": 0.1694851815700531 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4771069586277008, + "learning_rate": 3.453913478183102e-07, + "loss": 0.212, + "step": 26245, + "teacher_loss": 0.18257340788841248 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.2887862026691437, + "learning_rate": 3.449069246849901e-07, + "loss": 0.1565, + "step": 26246, + "teacher_loss": 0.1417803019285202 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.2929953336715698, + "learning_rate": 3.444228375490133e-07, + "loss": 0.2148, + "step": 26247, + "teacher_loss": 0.20610585808753967 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.21139714121818542, + "learning_rate": 3.439390864214842e-07, + "loss": 0.1808, + "step": 26248, + "teacher_loss": 0.17742875218391418 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.39774900674819946, + "learning_rate": 3.4345567131348896e-07, + "loss": 0.1762, + "step": 26249, + "teacher_loss": 0.1515520066022873 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4918766915798187, + "learning_rate": 3.4297259223611376e-07, + "loss": 0.2063, + "step": 26250, + "teacher_loss": 0.17459869384765625 + }, + { + "epoch": 4.74, + "eval_exact_match": 80.69063386944181, + "eval_f1": 87.91655648696916, + "step": 26250 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4254281520843506, + "learning_rate": 3.4248984920043137e-07, + "loss": 0.1926, + "step": 26251, + "teacher_loss": 0.16677838563919067 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.9359781742095947, + "learning_rate": 3.4200744221751125e-07, + "loss": 0.2905, + "step": 26252, + "teacher_loss": 0.21872839331626892 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.4856973886489868, + "learning_rate": 3.415253712984162e-07, + "loss": 0.1758, + "step": 26253, + "teacher_loss": 0.14137227833271027 + }, + { + "compression_loss": 0.0, + "epoch": 4.74, + "label_loss": 0.9807087779045105, + "learning_rate": 3.4104363645419246e-07, + "loss": 0.3067, + "step": 26254, + "teacher_loss": 0.2318429946899414 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.18705926835536957, + "learning_rate": 3.4056223769588944e-07, + "loss": 0.181, + "step": 26255, + "teacher_loss": 0.1802825629711151 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.36990833282470703, + "learning_rate": 3.4008117503454506e-07, + "loss": 0.2258, + "step": 26256, + "teacher_loss": 0.20978787541389465 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.44578301906585693, + "learning_rate": 3.396004484811838e-07, + "loss": 0.1968, + "step": 26257, + "teacher_loss": 0.16919000446796417 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.2843993902206421, + "learning_rate": 3.391200580468318e-07, + "loss": 0.1589, + "step": 26258, + "teacher_loss": 0.1449570506811142 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5973250865936279, + "learning_rate": 3.386400037425019e-07, + "loss": 0.2229, + "step": 26259, + "teacher_loss": 0.18125376105308533 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.2596374452114105, + "learning_rate": 3.381602855791988e-07, + "loss": 0.1482, + "step": 26260, + "teacher_loss": 0.13577735424041748 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 1.0355414152145386, + "learning_rate": 3.376809035679218e-07, + "loss": 0.219, + "step": 26261, + "teacher_loss": 0.1282472312450409 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.4196634292602539, + "learning_rate": 3.372018577196606e-07, + "loss": 0.2368, + "step": 26262, + "teacher_loss": 0.21652746200561523 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.4443909525871277, + "learning_rate": 3.367231480454014e-07, + "loss": 0.2303, + "step": 26263, + "teacher_loss": 0.20652136206626892 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3333846628665924, + "learning_rate": 3.36244774556117e-07, + "loss": 0.2416, + "step": 26264, + "teacher_loss": 0.23137059807777405 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.7903521060943604, + "learning_rate": 3.357667372627754e-07, + "loss": 0.2566, + "step": 26265, + "teacher_loss": 0.19729208946228027 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.2967739999294281, + "learning_rate": 3.352890361763378e-07, + "loss": 0.2082, + "step": 26266, + "teacher_loss": 0.19836321473121643 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.4878973960876465, + "learning_rate": 3.348116713077537e-07, + "loss": 0.2785, + "step": 26267, + "teacher_loss": 0.2552553713321686 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.6344353556632996, + "learning_rate": 3.3433464266796945e-07, + "loss": 0.2351, + "step": 26268, + "teacher_loss": 0.19077935814857483 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.16268765926361084, + "learning_rate": 3.3385795026792454e-07, + "loss": 0.1362, + "step": 26269, + "teacher_loss": 0.1332828551530838 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.4555031657218933, + "learning_rate": 3.3338159411854363e-07, + "loss": 0.1983, + "step": 26270, + "teacher_loss": 0.16968286037445068 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5620983839035034, + "learning_rate": 3.3290557423074964e-07, + "loss": 0.1706, + "step": 26271, + "teacher_loss": 0.1271371841430664 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.6019840240478516, + "learning_rate": 3.3242989061545715e-07, + "loss": 0.3218, + "step": 26272, + "teacher_loss": 0.290721595287323 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.42375150322914124, + "learning_rate": 3.3195454328357246e-07, + "loss": 0.1991, + "step": 26273, + "teacher_loss": 0.17414425313472748 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5715345144271851, + "learning_rate": 3.3147953224599017e-07, + "loss": 0.2348, + "step": 26274, + "teacher_loss": 0.19743695855140686 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.4060556888580322, + "learning_rate": 3.3100485751360656e-07, + "loss": 0.2333, + "step": 26275, + "teacher_loss": 0.2141450047492981 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.7044281363487244, + "learning_rate": 3.3053051909730123e-07, + "loss": 0.2395, + "step": 26276, + "teacher_loss": 0.1878378540277481 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.19606342911720276, + "learning_rate": 3.300565170079489e-07, + "loss": 0.2074, + "step": 26277, + "teacher_loss": 0.2086627334356308 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.48753833770751953, + "learning_rate": 3.295828512564175e-07, + "loss": 0.1909, + "step": 26278, + "teacher_loss": 0.15795306861400604 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.16648554801940918, + "learning_rate": 3.2910952185357e-07, + "loss": 0.2165, + "step": 26279, + "teacher_loss": 0.22205622494220734 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.1778557002544403, + "learning_rate": 3.2863652881025273e-07, + "loss": 0.1175, + "step": 26280, + "teacher_loss": 0.11084824055433273 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3704199194908142, + "learning_rate": 3.2816387213731205e-07, + "loss": 0.1731, + "step": 26281, + "teacher_loss": 0.1512129008769989 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5568274855613708, + "learning_rate": 3.2769155184558754e-07, + "loss": 0.2153, + "step": 26282, + "teacher_loss": 0.17738838493824005 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3030836284160614, + "learning_rate": 3.272195679459072e-07, + "loss": 0.2037, + "step": 26283, + "teacher_loss": 0.1926092952489853 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 1.03810715675354, + "learning_rate": 3.2674792044908587e-07, + "loss": 0.327, + "step": 26284, + "teacher_loss": 0.24797660112380981 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5469939708709717, + "learning_rate": 3.262766093659464e-07, + "loss": 0.2262, + "step": 26285, + "teacher_loss": 0.19051909446716309 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.36931300163269043, + "learning_rate": 3.258056347072902e-07, + "loss": 0.1747, + "step": 26286, + "teacher_loss": 0.1530396193265915 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.6724317073822021, + "learning_rate": 3.2533499648391027e-07, + "loss": 0.3356, + "step": 26287, + "teacher_loss": 0.29819542169570923 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.30945494771003723, + "learning_rate": 3.248646947066064e-07, + "loss": 0.141, + "step": 26288, + "teacher_loss": 0.12223626673221588 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3482980728149414, + "learning_rate": 3.243947293861582e-07, + "loss": 0.1832, + "step": 26289, + "teacher_loss": 0.16480231285095215 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.603256344795227, + "learning_rate": 3.2392510053333544e-07, + "loss": 0.2733, + "step": 26290, + "teacher_loss": 0.23659095168113708 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.41624385118484497, + "learning_rate": 3.234558081589095e-07, + "loss": 0.2476, + "step": 26291, + "teacher_loss": 0.22891342639923096 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3951801061630249, + "learning_rate": 3.229868522736418e-07, + "loss": 0.1865, + "step": 26292, + "teacher_loss": 0.16329112648963928 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5902349948883057, + "learning_rate": 3.2251823288827863e-07, + "loss": 0.2538, + "step": 26293, + "teacher_loss": 0.2164284586906433 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3152114748954773, + "learning_rate": 3.220499500135682e-07, + "loss": 0.1829, + "step": 26294, + "teacher_loss": 0.16816270351409912 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.613085925579071, + "learning_rate": 3.2158200366024684e-07, + "loss": 0.2025, + "step": 26295, + "teacher_loss": 0.1568540334701538 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5086848735809326, + "learning_rate": 3.211143938390393e-07, + "loss": 0.23, + "step": 26296, + "teacher_loss": 0.19899138808250427 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5446643829345703, + "learning_rate": 3.2064712056067036e-07, + "loss": 0.2495, + "step": 26297, + "teacher_loss": 0.21670971810817719 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.32474637031555176, + "learning_rate": 3.2018018383585315e-07, + "loss": 0.2054, + "step": 26298, + "teacher_loss": 0.1921561360359192 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.39813587069511414, + "learning_rate": 3.197135836752907e-07, + "loss": 0.2352, + "step": 26299, + "teacher_loss": 0.21707241237163544 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.45196977257728577, + "learning_rate": 3.192473200896828e-07, + "loss": 0.1938, + "step": 26300, + "teacher_loss": 0.1651480793952942 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5202337503433228, + "learning_rate": 3.1878139308971765e-07, + "loss": 0.1896, + "step": 26301, + "teacher_loss": 0.15282979607582092 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.3252509832382202, + "learning_rate": 3.183158026860816e-07, + "loss": 0.2581, + "step": 26302, + "teacher_loss": 0.25064074993133545 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.43483084440231323, + "learning_rate": 3.1785054888944286e-07, + "loss": 0.2261, + "step": 26303, + "teacher_loss": 0.20291420817375183 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.38876041769981384, + "learning_rate": 3.173856317104712e-07, + "loss": 0.1841, + "step": 26304, + "teacher_loss": 0.16133934259414673 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.42302560806274414, + "learning_rate": 3.169210511598297e-07, + "loss": 0.2019, + "step": 26305, + "teacher_loss": 0.17732861638069153 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.5700348615646362, + "learning_rate": 3.1645680724816326e-07, + "loss": 0.2735, + "step": 26306, + "teacher_loss": 0.24056124687194824 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.8543387055397034, + "learning_rate": 3.1599289998611834e-07, + "loss": 0.3369, + "step": 26307, + "teacher_loss": 0.27941831946372986 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.37396278977394104, + "learning_rate": 3.1552932938433476e-07, + "loss": 0.2127, + "step": 26308, + "teacher_loss": 0.1947641372680664 + }, + { + "compression_loss": 0.0, + "epoch": 4.75, + "label_loss": 0.40257367491722107, + "learning_rate": 3.15066095453434e-07, + "loss": 0.2388, + "step": 26309, + "teacher_loss": 0.220563143491745 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.4022144675254822, + "learning_rate": 3.146031982040426e-07, + "loss": 0.2384, + "step": 26310, + "teacher_loss": 0.22024913132190704 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.23328836262226105, + "learning_rate": 3.141406376467687e-07, + "loss": 0.1356, + "step": 26311, + "teacher_loss": 0.12469995766878128 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3091126084327698, + "learning_rate": 3.1367841379221885e-07, + "loss": 0.2096, + "step": 26312, + "teacher_loss": 0.19856882095336914 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.6228529214859009, + "learning_rate": 3.132165266509945e-07, + "loss": 0.325, + "step": 26313, + "teacher_loss": 0.29187560081481934 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.2866888642311096, + "learning_rate": 3.12754976233679e-07, + "loss": 0.1454, + "step": 26314, + "teacher_loss": 0.12974685430526733 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.8244979381561279, + "learning_rate": 3.1229376255085707e-07, + "loss": 0.2211, + "step": 26315, + "teacher_loss": 0.15406744182109833 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.41625070571899414, + "learning_rate": 3.118328856131053e-07, + "loss": 0.2463, + "step": 26316, + "teacher_loss": 0.22736340761184692 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.28182893991470337, + "learning_rate": 3.1137234543098524e-07, + "loss": 0.1761, + "step": 26317, + "teacher_loss": 0.16436265408992767 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.44042664766311646, + "learning_rate": 3.1091214201506015e-07, + "loss": 0.2201, + "step": 26318, + "teacher_loss": 0.19556373357772827 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.532873272895813, + "learning_rate": 3.1045227537587984e-07, + "loss": 0.2351, + "step": 26319, + "teacher_loss": 0.20199242234230042 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5981099605560303, + "learning_rate": 3.099927455239876e-07, + "loss": 0.2278, + "step": 26320, + "teacher_loss": 0.18663232028484344 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.29750481247901917, + "learning_rate": 3.0953355246991663e-07, + "loss": 0.2348, + "step": 26321, + "teacher_loss": 0.22787630558013916 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.11789701879024506, + "learning_rate": 3.0907469622420024e-07, + "loss": 0.1564, + "step": 26322, + "teacher_loss": 0.16062262654304504 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.21908657252788544, + "learning_rate": 3.08616176797355e-07, + "loss": 0.1993, + "step": 26323, + "teacher_loss": 0.19707436859607697 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3766312599182129, + "learning_rate": 3.081579941998908e-07, + "loss": 0.192, + "step": 26324, + "teacher_loss": 0.17143815755844116 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5554318428039551, + "learning_rate": 3.077001484423175e-07, + "loss": 0.259, + "step": 26325, + "teacher_loss": 0.22609932720661163 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.33158108592033386, + "learning_rate": 3.072426395351302e-07, + "loss": 0.1857, + "step": 26326, + "teacher_loss": 0.16948994994163513 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.30557170510292053, + "learning_rate": 3.0678546748881544e-07, + "loss": 0.2006, + "step": 26327, + "teacher_loss": 0.18898791074752808 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.48600152134895325, + "learning_rate": 3.063286323138598e-07, + "loss": 0.2139, + "step": 26328, + "teacher_loss": 0.18365202844142914 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5222252011299133, + "learning_rate": 3.058721340207349e-07, + "loss": 0.215, + "step": 26329, + "teacher_loss": 0.1808834820985794 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5014474391937256, + "learning_rate": 3.054159726199057e-07, + "loss": 0.2845, + "step": 26330, + "teacher_loss": 0.2604144811630249 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.4072541296482086, + "learning_rate": 3.049601481218306e-07, + "loss": 0.2692, + "step": 26331, + "teacher_loss": 0.2538619041442871 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5288593769073486, + "learning_rate": 3.045046605369628e-07, + "loss": 0.2626, + "step": 26332, + "teacher_loss": 0.2330707609653473 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.486158549785614, + "learning_rate": 3.0404950987574566e-07, + "loss": 0.2574, + "step": 26333, + "teacher_loss": 0.23201040923595428 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.4799298644065857, + "learning_rate": 3.0359469614860745e-07, + "loss": 0.1861, + "step": 26334, + "teacher_loss": 0.15346582233905792 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.1772279143333435, + "learning_rate": 3.031402193659849e-07, + "loss": 0.1796, + "step": 26335, + "teacher_loss": 0.17987008392810822 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3721959888935089, + "learning_rate": 3.0268607953829297e-07, + "loss": 0.1887, + "step": 26336, + "teacher_loss": 0.16830384731292725 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5456550717353821, + "learning_rate": 3.022322766759417e-07, + "loss": 0.2174, + "step": 26337, + "teacher_loss": 0.1809774935245514 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3592498302459717, + "learning_rate": 3.01778810789341e-07, + "loss": 0.1746, + "step": 26338, + "teacher_loss": 0.1540893018245697 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3503061532974243, + "learning_rate": 3.0132568188888433e-07, + "loss": 0.1834, + "step": 26339, + "teacher_loss": 0.1648324429988861 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.6267522573471069, + "learning_rate": 3.0087288998496e-07, + "loss": 0.5017, + "step": 26340, + "teacher_loss": 0.4878247082233429 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.25746411085128784, + "learning_rate": 3.004204350879497e-07, + "loss": 0.2292, + "step": 26341, + "teacher_loss": 0.22608506679534912 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3029187023639679, + "learning_rate": 2.999683172082301e-07, + "loss": 0.2235, + "step": 26342, + "teacher_loss": 0.2146538347005844 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.32246890664100647, + "learning_rate": 2.9951653635616137e-07, + "loss": 0.2214, + "step": 26343, + "teacher_loss": 0.2101290076971054 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.19092437624931335, + "learning_rate": 2.9906509254210344e-07, + "loss": 0.1965, + "step": 26344, + "teacher_loss": 0.19709143042564392 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.367572158575058, + "learning_rate": 2.986139857764097e-07, + "loss": 0.2081, + "step": 26345, + "teacher_loss": 0.19033510982990265 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.5233211517333984, + "learning_rate": 2.981632160694187e-07, + "loss": 0.2731, + "step": 26346, + "teacher_loss": 0.24531182646751404 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.35632947087287903, + "learning_rate": 2.9771278343146526e-07, + "loss": 0.1611, + "step": 26347, + "teacher_loss": 0.13943755626678467 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.14506271481513977, + "learning_rate": 2.972626878728812e-07, + "loss": 0.1811, + "step": 26348, + "teacher_loss": 0.18512314558029175 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.2981428802013397, + "learning_rate": 2.9681292940398173e-07, + "loss": 0.2308, + "step": 26349, + "teacher_loss": 0.2233605831861496 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.8253594636917114, + "learning_rate": 2.963635080350785e-07, + "loss": 0.2061, + "step": 26350, + "teacher_loss": 0.1373479813337326 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.338609516620636, + "learning_rate": 2.9591442377647496e-07, + "loss": 0.2608, + "step": 26351, + "teacher_loss": 0.2521321773529053 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.4732528328895569, + "learning_rate": 2.954656766384711e-07, + "loss": 0.2211, + "step": 26352, + "teacher_loss": 0.19309645891189575 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.43659302592277527, + "learning_rate": 2.950172666313522e-07, + "loss": 0.2237, + "step": 26353, + "teacher_loss": 0.20003335177898407 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.4235661029815674, + "learning_rate": 2.9456919376539825e-07, + "loss": 0.2094, + "step": 26354, + "teacher_loss": 0.1856456696987152 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3486536741256714, + "learning_rate": 2.9412145805088443e-07, + "loss": 0.216, + "step": 26355, + "teacher_loss": 0.20124486088752747 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.24341291189193726, + "learning_rate": 2.9367405949807413e-07, + "loss": 0.1615, + "step": 26356, + "teacher_loss": 0.15240870416164398 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.7425260543823242, + "learning_rate": 2.932269981172275e-07, + "loss": 0.3513, + "step": 26357, + "teacher_loss": 0.307780921459198 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.2214815616607666, + "learning_rate": 2.927802739185914e-07, + "loss": 0.2012, + "step": 26358, + "teacher_loss": 0.1988985240459442 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3996084928512573, + "learning_rate": 2.9233388691240927e-07, + "loss": 0.248, + "step": 26359, + "teacher_loss": 0.23118874430656433 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3956166207790375, + "learning_rate": 2.918878371089162e-07, + "loss": 0.1943, + "step": 26360, + "teacher_loss": 0.17198149859905243 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3624839782714844, + "learning_rate": 2.914421245183374e-07, + "loss": 0.1648, + "step": 26361, + "teacher_loss": 0.14287975430488586 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.3230903446674347, + "learning_rate": 2.9099674915089136e-07, + "loss": 0.1902, + "step": 26362, + "teacher_loss": 0.1754865050315857 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.29984021186828613, + "learning_rate": 2.905517110167899e-07, + "loss": 0.1526, + "step": 26363, + "teacher_loss": 0.13621768355369568 + }, + { + "compression_loss": 0.0, + "epoch": 4.76, + "label_loss": 0.4239395260810852, + "learning_rate": 2.9010701012623655e-07, + "loss": 0.3026, + "step": 26364, + "teacher_loss": 0.2891601324081421 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5674530267715454, + "learning_rate": 2.896626464894281e-07, + "loss": 0.2767, + "step": 26365, + "teacher_loss": 0.24441049993038177 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.44827908277511597, + "learning_rate": 2.8921862011654974e-07, + "loss": 0.217, + "step": 26366, + "teacher_loss": 0.1913110762834549 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.4237147569656372, + "learning_rate": 2.8877493101778505e-07, + "loss": 0.2401, + "step": 26367, + "teacher_loss": 0.21964557468891144 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.27484995126724243, + "learning_rate": 2.883315792033042e-07, + "loss": 0.1901, + "step": 26368, + "teacher_loss": 0.18069766461849213 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.4476448595523834, + "learning_rate": 2.878885646832707e-07, + "loss": 0.2332, + "step": 26369, + "teacher_loss": 0.20936471223831177 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.4599837064743042, + "learning_rate": 2.874458874678432e-07, + "loss": 0.1811, + "step": 26370, + "teacher_loss": 0.15007565915584564 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.31840455532073975, + "learning_rate": 2.870035475671734e-07, + "loss": 0.2986, + "step": 26371, + "teacher_loss": 0.2964409291744232 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5051140785217285, + "learning_rate": 2.865615449913983e-07, + "loss": 0.2456, + "step": 26372, + "teacher_loss": 0.21673625707626343 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.49418067932128906, + "learning_rate": 2.8611987975065643e-07, + "loss": 0.2602, + "step": 26373, + "teacher_loss": 0.2341851145029068 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.39917486906051636, + "learning_rate": 2.856785518550681e-07, + "loss": 0.2121, + "step": 26374, + "teacher_loss": 0.19130854308605194 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.6162400245666504, + "learning_rate": 2.852375613147551e-07, + "loss": 0.2296, + "step": 26375, + "teacher_loss": 0.18666520714759827 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.20338264107704163, + "learning_rate": 2.847969081398294e-07, + "loss": 0.2139, + "step": 26376, + "teacher_loss": 0.21512313187122345 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.24729056656360626, + "learning_rate": 2.8435659234039127e-07, + "loss": 0.1405, + "step": 26377, + "teacher_loss": 0.12858551740646362 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.35191404819488525, + "learning_rate": 2.839166139265359e-07, + "loss": 0.1491, + "step": 26378, + "teacher_loss": 0.12652051448822021 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.34629392623901367, + "learning_rate": 2.834769729083536e-07, + "loss": 0.1931, + "step": 26379, + "teacher_loss": 0.17610520124435425 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.38561904430389404, + "learning_rate": 2.830376692959197e-07, + "loss": 0.1867, + "step": 26380, + "teacher_loss": 0.16459277272224426 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.28386735916137695, + "learning_rate": 2.8259870309930933e-07, + "loss": 0.2168, + "step": 26381, + "teacher_loss": 0.2093191146850586 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.24095045030117035, + "learning_rate": 2.8216007432858625e-07, + "loss": 0.1971, + "step": 26382, + "teacher_loss": 0.1922469586133957 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.2198868691921234, + "learning_rate": 2.8172178299380567e-07, + "loss": 0.1287, + "step": 26383, + "teacher_loss": 0.11860324442386627 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5486941337585449, + "learning_rate": 2.8128382910501626e-07, + "loss": 0.234, + "step": 26384, + "teacher_loss": 0.1990022361278534 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.24689486622810364, + "learning_rate": 2.8084621267226e-07, + "loss": 0.1895, + "step": 26385, + "teacher_loss": 0.18308451771736145 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.12628792226314545, + "learning_rate": 2.804089337055704e-07, + "loss": 0.1404, + "step": 26386, + "teacher_loss": 0.14197522401809692 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5132458209991455, + "learning_rate": 2.7997199221497126e-07, + "loss": 0.2338, + "step": 26387, + "teacher_loss": 0.20276933908462524 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 1.0073460340499878, + "learning_rate": 2.7953538821048284e-07, + "loss": 0.4536, + "step": 26388, + "teacher_loss": 0.39209312200546265 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.45065838098526, + "learning_rate": 2.7909912170211217e-07, + "loss": 0.2622, + "step": 26389, + "teacher_loss": 0.24121883511543274 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.6563032865524292, + "learning_rate": 2.786631926998645e-07, + "loss": 0.1917, + "step": 26390, + "teacher_loss": 0.14005742967128754 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.21098321676254272, + "learning_rate": 2.7822760121373193e-07, + "loss": 0.2678, + "step": 26391, + "teacher_loss": 0.27416369318962097 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.3288878798484802, + "learning_rate": 2.7779234725370304e-07, + "loss": 0.3039, + "step": 26392, + "teacher_loss": 0.3010862171649933 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.3994484841823578, + "learning_rate": 2.7735743082975494e-07, + "loss": 0.1921, + "step": 26393, + "teacher_loss": 0.1691109538078308 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.6256210803985596, + "learning_rate": 2.769228519518613e-07, + "loss": 0.2387, + "step": 26394, + "teacher_loss": 0.19569721817970276 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.250484824180603, + "learning_rate": 2.764886106299841e-07, + "loss": 0.1844, + "step": 26395, + "teacher_loss": 0.17708684504032135 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.3137631118297577, + "learning_rate": 2.760547068740804e-07, + "loss": 0.1524, + "step": 26396, + "teacher_loss": 0.13451674580574036 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.18493443727493286, + "learning_rate": 2.756211406940956e-07, + "loss": 0.1588, + "step": 26397, + "teacher_loss": 0.15591105818748474 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5682260990142822, + "learning_rate": 2.7518791209997505e-07, + "loss": 0.1838, + "step": 26398, + "teacher_loss": 0.14107362926006317 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.6092061400413513, + "learning_rate": 2.747550211016475e-07, + "loss": 0.268, + "step": 26399, + "teacher_loss": 0.23008251190185547 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.4932122826576233, + "learning_rate": 2.7432246770903835e-07, + "loss": 0.207, + "step": 26400, + "teacher_loss": 0.1751624494791031 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.3317599296569824, + "learning_rate": 2.7389025193206297e-07, + "loss": 0.1604, + "step": 26401, + "teacher_loss": 0.14130732417106628 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.33530908823013306, + "learning_rate": 2.734583737806368e-07, + "loss": 0.3463, + "step": 26402, + "teacher_loss": 0.34751635789871216 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.2231946736574173, + "learning_rate": 2.730268332646552e-07, + "loss": 0.1852, + "step": 26403, + "teacher_loss": 0.18094466626644135 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.6410014629364014, + "learning_rate": 2.725956303940136e-07, + "loss": 0.2475, + "step": 26404, + "teacher_loss": 0.2037343978881836 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.18390896916389465, + "learning_rate": 2.7216476517860245e-07, + "loss": 0.1635, + "step": 26405, + "teacher_loss": 0.16128680109977722 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.4148280918598175, + "learning_rate": 2.717342376282955e-07, + "loss": 0.1901, + "step": 26406, + "teacher_loss": 0.16518238186836243 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.21636715531349182, + "learning_rate": 2.7130404775296323e-07, + "loss": 0.1077, + "step": 26407, + "teacher_loss": 0.09557149559259415 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5851198434829712, + "learning_rate": 2.708741955624727e-07, + "loss": 0.2315, + "step": 26408, + "teacher_loss": 0.19225898385047913 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.47046583890914917, + "learning_rate": 2.7044468106667607e-07, + "loss": 0.1835, + "step": 26409, + "teacher_loss": 0.15158924460411072 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.4412728548049927, + "learning_rate": 2.7001550427541877e-07, + "loss": 0.1856, + "step": 26410, + "teacher_loss": 0.15719375014305115 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.27848392724990845, + "learning_rate": 2.6958666519854626e-07, + "loss": 0.1596, + "step": 26411, + "teacher_loss": 0.1463763415813446 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.28710201382637024, + "learning_rate": 2.6915816384588566e-07, + "loss": 0.2505, + "step": 26412, + "teacher_loss": 0.24644553661346436 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.25141000747680664, + "learning_rate": 2.687300002272641e-07, + "loss": 0.2122, + "step": 26413, + "teacher_loss": 0.20789504051208496 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.49327850341796875, + "learning_rate": 2.683021743524955e-07, + "loss": 0.2618, + "step": 26414, + "teacher_loss": 0.23603224754333496 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.37122592329978943, + "learning_rate": 2.6787468623139186e-07, + "loss": 0.1799, + "step": 26415, + "teacher_loss": 0.15866342186927795 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.30191725492477417, + "learning_rate": 2.6744753587375216e-07, + "loss": 0.1692, + "step": 26416, + "teacher_loss": 0.15440186858177185 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.38406866788864136, + "learning_rate": 2.670207232893684e-07, + "loss": 0.2757, + "step": 26417, + "teacher_loss": 0.26361894607543945 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.558273196220398, + "learning_rate": 2.665942484880296e-07, + "loss": 0.2015, + "step": 26418, + "teacher_loss": 0.16183751821517944 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.33274343609809875, + "learning_rate": 2.6616811147951116e-07, + "loss": 0.1573, + "step": 26419, + "teacher_loss": 0.1377846896648407 + }, + { + "compression_loss": 0.0, + "epoch": 4.77, + "label_loss": 0.5601588487625122, + "learning_rate": 2.657423122735836e-07, + "loss": 0.1978, + "step": 26420, + "teacher_loss": 0.15754413604736328 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.7319083213806152, + "learning_rate": 2.653168508800091e-07, + "loss": 0.2985, + "step": 26421, + "teacher_loss": 0.2503550052642822 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3340081572532654, + "learning_rate": 2.648917273085416e-07, + "loss": 0.1891, + "step": 26422, + "teacher_loss": 0.1730543076992035 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.10499230027198792, + "learning_rate": 2.644669415689299e-07, + "loss": 0.1734, + "step": 26423, + "teacher_loss": 0.1809711456298828 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3077067732810974, + "learning_rate": 2.6404249367091126e-07, + "loss": 0.1808, + "step": 26424, + "teacher_loss": 0.166713684797287 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.4672786593437195, + "learning_rate": 2.6361838362421784e-07, + "loss": 0.2314, + "step": 26425, + "teacher_loss": 0.20522728562355042 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.4785950183868408, + "learning_rate": 2.631946114385719e-07, + "loss": 0.1805, + "step": 26426, + "teacher_loss": 0.14734497666358948 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.643587052822113, + "learning_rate": 2.627711771236907e-07, + "loss": 0.2314, + "step": 26427, + "teacher_loss": 0.1855829656124115 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.18949052691459656, + "learning_rate": 2.6234808068928305e-07, + "loss": 0.1623, + "step": 26428, + "teacher_loss": 0.15924076735973358 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.31570878624916077, + "learning_rate": 2.619253221450479e-07, + "loss": 0.2534, + "step": 26429, + "teacher_loss": 0.24648021161556244 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.7868441343307495, + "learning_rate": 2.6150290150067593e-07, + "loss": 0.2048, + "step": 26430, + "teacher_loss": 0.1401652991771698 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.5456027984619141, + "learning_rate": 2.6108081876585755e-07, + "loss": 0.1727, + "step": 26431, + "teacher_loss": 0.1312258541584015 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.28667229413986206, + "learning_rate": 2.606590739502634e-07, + "loss": 0.1831, + "step": 26432, + "teacher_loss": 0.1716083437204361 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.16899427771568298, + "learning_rate": 2.6023766706356756e-07, + "loss": 0.1447, + "step": 26433, + "teacher_loss": 0.1420421004295349 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.37226301431655884, + "learning_rate": 2.598165981154288e-07, + "loss": 0.1579, + "step": 26434, + "teacher_loss": 0.13403035700321198 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.44914135336875916, + "learning_rate": 2.593958671155028e-07, + "loss": 0.2185, + "step": 26435, + "teacher_loss": 0.19283771514892578 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.21628999710083008, + "learning_rate": 2.5897547407343516e-07, + "loss": 0.1639, + "step": 26436, + "teacher_loss": 0.1580483615398407 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.2801150977611542, + "learning_rate": 2.5855541899886314e-07, + "loss": 0.2193, + "step": 26437, + "teacher_loss": 0.212594673037529 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.8119360208511353, + "learning_rate": 2.5813570190141747e-07, + "loss": 0.4668, + "step": 26438, + "teacher_loss": 0.42847010493278503 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.4729951322078705, + "learning_rate": 2.5771632279072363e-07, + "loss": 0.2072, + "step": 26439, + "teacher_loss": 0.17761777341365814 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.22920605540275574, + "learning_rate": 2.5729728167639413e-07, + "loss": 0.1669, + "step": 26440, + "teacher_loss": 0.159982830286026 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.8074127435684204, + "learning_rate": 2.5687857856803775e-07, + "loss": 0.3316, + "step": 26441, + "teacher_loss": 0.27872878313064575 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.20560744404792786, + "learning_rate": 2.564602134752536e-07, + "loss": 0.1814, + "step": 26442, + "teacher_loss": 0.17868748307228088 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.430389404296875, + "learning_rate": 2.560421864076307e-07, + "loss": 0.2, + "step": 26443, + "teacher_loss": 0.17438830435276031 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3647538125514984, + "learning_rate": 2.556244973747579e-07, + "loss": 0.1727, + "step": 26444, + "teacher_loss": 0.15137921273708344 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 1.0523293018341064, + "learning_rate": 2.552071463862093e-07, + "loss": 0.31, + "step": 26445, + "teacher_loss": 0.2275182604789734 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.6857502460479736, + "learning_rate": 2.54790133451554e-07, + "loss": 0.2519, + "step": 26446, + "teacher_loss": 0.20367178320884705 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.16118204593658447, + "learning_rate": 2.5437345858035253e-07, + "loss": 0.173, + "step": 26447, + "teacher_loss": 0.1743564009666443 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3464326858520508, + "learning_rate": 2.539571217821557e-07, + "loss": 0.1638, + "step": 26448, + "teacher_loss": 0.14353898167610168 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.28996506333351135, + "learning_rate": 2.5354112306651414e-07, + "loss": 0.2213, + "step": 26449, + "teacher_loss": 0.2136838138103485 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.17318224906921387, + "learning_rate": 2.531254624429602e-07, + "loss": 0.165, + "step": 26450, + "teacher_loss": 0.16404922306537628 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.40535157918930054, + "learning_rate": 2.5271013992102796e-07, + "loss": 0.2462, + "step": 26451, + "teacher_loss": 0.228561669588089 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.33080509305000305, + "learning_rate": 2.522951555102365e-07, + "loss": 0.2033, + "step": 26452, + "teacher_loss": 0.18909047544002533 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3085571527481079, + "learning_rate": 2.518805092201015e-07, + "loss": 0.184, + "step": 26453, + "teacher_loss": 0.1702013909816742 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.14445427060127258, + "learning_rate": 2.5146620106012706e-07, + "loss": 0.1399, + "step": 26454, + "teacher_loss": 0.1394350528717041 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.5394058227539062, + "learning_rate": 2.5105223103981723e-07, + "loss": 0.341, + "step": 26455, + "teacher_loss": 0.31900209188461304 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.29711222648620605, + "learning_rate": 2.5063859916866107e-07, + "loss": 0.2425, + "step": 26456, + "teacher_loss": 0.23638153076171875 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.2803683876991272, + "learning_rate": 2.50225305456136e-07, + "loss": 0.1703, + "step": 26457, + "teacher_loss": 0.158098965883255 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.35664355754852295, + "learning_rate": 2.498123499117261e-07, + "loss": 0.1625, + "step": 26458, + "teacher_loss": 0.14097297191619873 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.5782793760299683, + "learning_rate": 2.493997325448971e-07, + "loss": 0.2614, + "step": 26459, + "teacher_loss": 0.22613874077796936 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.4967818856239319, + "learning_rate": 2.489874533651032e-07, + "loss": 0.1739, + "step": 26460, + "teacher_loss": 0.1380581110715866 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.9243016242980957, + "learning_rate": 2.4857551238180507e-07, + "loss": 0.3001, + "step": 26461, + "teacher_loss": 0.23076987266540527 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.5367703437805176, + "learning_rate": 2.4816390960444193e-07, + "loss": 0.2458, + "step": 26462, + "teacher_loss": 0.2134486436843872 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.481407105922699, + "learning_rate": 2.477526450424511e-07, + "loss": 0.2575, + "step": 26463, + "teacher_loss": 0.23266759514808655 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3602825403213501, + "learning_rate": 2.4734171870526343e-07, + "loss": 0.2303, + "step": 26464, + "teacher_loss": 0.21589820086956024 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.13266390562057495, + "learning_rate": 2.469311306022998e-07, + "loss": 0.1847, + "step": 26465, + "teacher_loss": 0.1905362606048584 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.4018552899360657, + "learning_rate": 2.4652088074297254e-07, + "loss": 0.1965, + "step": 26466, + "teacher_loss": 0.17366188764572144 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.16853779554367065, + "learning_rate": 2.461109691366892e-07, + "loss": 0.1851, + "step": 26467, + "teacher_loss": 0.18689538538455963 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.20183219015598297, + "learning_rate": 2.457013957928472e-07, + "loss": 0.134, + "step": 26468, + "teacher_loss": 0.1264268010854721 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.801593005657196, + "learning_rate": 2.4529216072083583e-07, + "loss": 0.2409, + "step": 26469, + "teacher_loss": 0.17864912748336792 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.23200616240501404, + "learning_rate": 2.448832639300358e-07, + "loss": 0.1947, + "step": 26470, + "teacher_loss": 0.19052262604236603 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.5465121269226074, + "learning_rate": 2.4447470542982797e-07, + "loss": 0.2173, + "step": 26471, + "teacher_loss": 0.18067757785320282 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.7589600086212158, + "learning_rate": 2.440664852295749e-07, + "loss": 0.2681, + "step": 26472, + "teacher_loss": 0.21354196965694427 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.6373481750488281, + "learning_rate": 2.4365860333863733e-07, + "loss": 0.2424, + "step": 26473, + "teacher_loss": 0.19853463768959045 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.2517188787460327, + "learning_rate": 2.4325105976636453e-07, + "loss": 0.2081, + "step": 26474, + "teacher_loss": 0.20324605703353882 + }, + { + "compression_loss": 0.0, + "epoch": 4.78, + "label_loss": 0.3956500291824341, + "learning_rate": 2.4284385452210235e-07, + "loss": 0.2211, + "step": 26475, + "teacher_loss": 0.20167234539985657 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.45357558131217957, + "learning_rate": 2.424369876151866e-07, + "loss": 0.2909, + "step": 26476, + "teacher_loss": 0.2727966904640198 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.49215489625930786, + "learning_rate": 2.4203045905494494e-07, + "loss": 0.1995, + "step": 26477, + "teacher_loss": 0.16700318455696106 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.44279059767723083, + "learning_rate": 2.416242688506998e-07, + "loss": 0.2294, + "step": 26478, + "teacher_loss": 0.2057175189256668 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.24307036399841309, + "learning_rate": 2.412184170117604e-07, + "loss": 0.16, + "step": 26479, + "teacher_loss": 0.150734543800354 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.7411606311798096, + "learning_rate": 2.4081290354743437e-07, + "loss": 0.2158, + "step": 26480, + "teacher_loss": 0.1573847532272339 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.36068233847618103, + "learning_rate": 2.4040772846701753e-07, + "loss": 0.1761, + "step": 26481, + "teacher_loss": 0.15562735497951508 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2730235755443573, + "learning_rate": 2.4000289177980086e-07, + "loss": 0.151, + "step": 26482, + "teacher_loss": 0.13745997846126556 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.6067962646484375, + "learning_rate": 2.395983934950652e-07, + "loss": 0.2142, + "step": 26483, + "teacher_loss": 0.1705491542816162 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5053513050079346, + "learning_rate": 2.3919423362208314e-07, + "loss": 0.244, + "step": 26484, + "teacher_loss": 0.21493184566497803 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2645168900489807, + "learning_rate": 2.3879041217012233e-07, + "loss": 0.1899, + "step": 26485, + "teacher_loss": 0.18162211775779724 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.6817524433135986, + "learning_rate": 2.3838692914844195e-07, + "loss": 0.2229, + "step": 26486, + "teacher_loss": 0.1719537377357483 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.41481921076774597, + "learning_rate": 2.3798378456628965e-07, + "loss": 0.2059, + "step": 26487, + "teacher_loss": 0.18273188173770905 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.24145865440368652, + "learning_rate": 2.3758097843291138e-07, + "loss": 0.1735, + "step": 26488, + "teacher_loss": 0.16597720980644226 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.3588376045227051, + "learning_rate": 2.3717851075754305e-07, + "loss": 0.1788, + "step": 26489, + "teacher_loss": 0.15881605446338654 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.3366454243659973, + "learning_rate": 2.3677638154940895e-07, + "loss": 0.1678, + "step": 26490, + "teacher_loss": 0.14909303188323975 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5983163118362427, + "learning_rate": 2.3637459081773005e-07, + "loss": 0.1556, + "step": 26491, + "teacher_loss": 0.10645299404859543 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.33436524868011475, + "learning_rate": 2.3597313857171732e-07, + "loss": 0.152, + "step": 26492, + "teacher_loss": 0.13169236481189728 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.4331894814968109, + "learning_rate": 2.3557202482057671e-07, + "loss": 0.2406, + "step": 26493, + "teacher_loss": 0.21915876865386963 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.3451586365699768, + "learning_rate": 2.3517124957350257e-07, + "loss": 0.2379, + "step": 26494, + "teacher_loss": 0.22603173553943634 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.36440956592559814, + "learning_rate": 2.3477081283968582e-07, + "loss": 0.1448, + "step": 26495, + "teacher_loss": 0.12035196274518967 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.39625445008277893, + "learning_rate": 2.3437071462830585e-07, + "loss": 0.2064, + "step": 26496, + "teacher_loss": 0.18532824516296387 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2373029589653015, + "learning_rate": 2.339709549485336e-07, + "loss": 0.2252, + "step": 26497, + "teacher_loss": 0.2238886058330536 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.27212581038475037, + "learning_rate": 2.3357153380953845e-07, + "loss": 0.1484, + "step": 26498, + "teacher_loss": 0.13464315235614777 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.4619075059890747, + "learning_rate": 2.331724512204747e-07, + "loss": 0.1848, + "step": 26499, + "teacher_loss": 0.15403109788894653 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2452515959739685, + "learning_rate": 2.327737071904934e-07, + "loss": 0.1512, + "step": 26500, + "teacher_loss": 0.1407509446144104 + }, + { + "epoch": 4.79, + "eval_exact_match": 80.66225165562913, + "eval_f1": 87.93879348439872, + "step": 26500 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5733667612075806, + "learning_rate": 2.3237530172873722e-07, + "loss": 0.2678, + "step": 26501, + "teacher_loss": 0.23388376832008362 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.7125282287597656, + "learning_rate": 2.3197723484434054e-07, + "loss": 0.283, + "step": 26502, + "teacher_loss": 0.23530232906341553 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2018968164920807, + "learning_rate": 2.3157950654642767e-07, + "loss": 0.1515, + "step": 26503, + "teacher_loss": 0.14585034549236298 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.38363999128341675, + "learning_rate": 2.3118211684411972e-07, + "loss": 0.2264, + "step": 26504, + "teacher_loss": 0.20890435576438904 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.26954373717308044, + "learning_rate": 2.30785065746526e-07, + "loss": 0.1929, + "step": 26505, + "teacher_loss": 0.18433818221092224 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5033186674118042, + "learning_rate": 2.30388353262751e-07, + "loss": 0.2231, + "step": 26506, + "teacher_loss": 0.19196519255638123 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.21033942699432373, + "learning_rate": 2.2999197940188732e-07, + "loss": 0.2462, + "step": 26507, + "teacher_loss": 0.25014275312423706 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2414764165878296, + "learning_rate": 2.2959594417302776e-07, + "loss": 0.2291, + "step": 26508, + "teacher_loss": 0.22773322463035583 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5087376236915588, + "learning_rate": 2.2920024758524837e-07, + "loss": 0.1813, + "step": 26509, + "teacher_loss": 0.14496511220932007 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.24111220240592957, + "learning_rate": 2.2880488964762192e-07, + "loss": 0.1185, + "step": 26510, + "teacher_loss": 0.1048772931098938 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.6570134162902832, + "learning_rate": 2.2840987036921112e-07, + "loss": 0.2597, + "step": 26511, + "teacher_loss": 0.21555306017398834 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5416872501373291, + "learning_rate": 2.280151897590771e-07, + "loss": 0.2234, + "step": 26512, + "teacher_loss": 0.18802523612976074 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.4544157385826111, + "learning_rate": 2.2762084782626425e-07, + "loss": 0.1993, + "step": 26513, + "teacher_loss": 0.1710004210472107 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.3629343509674072, + "learning_rate": 2.2722684457981702e-07, + "loss": 0.2457, + "step": 26514, + "teacher_loss": 0.23268982768058777 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.3244015872478485, + "learning_rate": 2.2683318002876653e-07, + "loss": 0.1966, + "step": 26515, + "teacher_loss": 0.1823570877313614 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.6716141700744629, + "learning_rate": 2.2643985418213885e-07, + "loss": 0.2243, + "step": 26516, + "teacher_loss": 0.17457574605941772 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5024648904800415, + "learning_rate": 2.2604686704895184e-07, + "loss": 0.2098, + "step": 26517, + "teacher_loss": 0.17724426090717316 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5091278553009033, + "learning_rate": 2.2565421863821488e-07, + "loss": 0.1863, + "step": 26518, + "teacher_loss": 0.15045419335365295 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.26302212476730347, + "learning_rate": 2.2526190895893252e-07, + "loss": 0.2046, + "step": 26519, + "teacher_loss": 0.19807936251163483 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.37165072560310364, + "learning_rate": 2.2486993802009582e-07, + "loss": 0.2328, + "step": 26520, + "teacher_loss": 0.21733957529067993 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.42701277136802673, + "learning_rate": 2.244783058306943e-07, + "loss": 0.2234, + "step": 26521, + "teacher_loss": 0.20080187916755676 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.2805967926979065, + "learning_rate": 2.2408701239970741e-07, + "loss": 0.1739, + "step": 26522, + "teacher_loss": 0.1620882749557495 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5354892611503601, + "learning_rate": 2.23696057736103e-07, + "loss": 0.3474, + "step": 26523, + "teacher_loss": 0.3264472782611847 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.5913339853286743, + "learning_rate": 2.233054418488456e-07, + "loss": 0.2342, + "step": 26524, + "teacher_loss": 0.19456440210342407 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.6555658578872681, + "learning_rate": 2.22915164746893e-07, + "loss": 0.2652, + "step": 26525, + "teacher_loss": 0.22181519865989685 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.8262344598770142, + "learning_rate": 2.225252264391914e-07, + "loss": 0.3093, + "step": 26526, + "teacher_loss": 0.2518256604671478 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.4635121822357178, + "learning_rate": 2.2213562693468026e-07, + "loss": 0.3697, + "step": 26527, + "teacher_loss": 0.35927560925483704 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.323217511177063, + "learning_rate": 2.2174636624229416e-07, + "loss": 0.2255, + "step": 26528, + "teacher_loss": 0.21462669968605042 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.10991115868091583, + "learning_rate": 2.2135744437095595e-07, + "loss": 0.1601, + "step": 26529, + "teacher_loss": 0.16572898626327515 + }, + { + "compression_loss": 0.0, + "epoch": 4.79, + "label_loss": 0.4349479079246521, + "learning_rate": 2.2096886132958184e-07, + "loss": 0.2743, + "step": 26530, + "teacher_loss": 0.2564626634120941 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5304349064826965, + "learning_rate": 2.2058061712708466e-07, + "loss": 0.4099, + "step": 26531, + "teacher_loss": 0.39650243520736694 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.7071031332015991, + "learning_rate": 2.2019271177236067e-07, + "loss": 0.2578, + "step": 26532, + "teacher_loss": 0.20789135992527008 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.23349101841449738, + "learning_rate": 2.1980514527430606e-07, + "loss": 0.1776, + "step": 26533, + "teacher_loss": 0.17133569717407227 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 1.0195116996765137, + "learning_rate": 2.1941791764180542e-07, + "loss": 0.9554, + "step": 26534, + "teacher_loss": 0.9482549428939819 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.42511966824531555, + "learning_rate": 2.1903102888373993e-07, + "loss": 0.2786, + "step": 26535, + "teacher_loss": 0.262276828289032 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.4225679039955139, + "learning_rate": 2.186444790089742e-07, + "loss": 0.1886, + "step": 26536, + "teacher_loss": 0.16264456510543823 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.24601562321186066, + "learning_rate": 2.1825826802637617e-07, + "loss": 0.1944, + "step": 26537, + "teacher_loss": 0.18864095211029053 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5502891540527344, + "learning_rate": 2.1787239594479702e-07, + "loss": 0.2373, + "step": 26538, + "teacher_loss": 0.20250718295574188 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3730257749557495, + "learning_rate": 2.1748686277308472e-07, + "loss": 0.2807, + "step": 26539, + "teacher_loss": 0.2704831063747406 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.34546980261802673, + "learning_rate": 2.171016685200772e-07, + "loss": 0.2523, + "step": 26540, + "teacher_loss": 0.2419959306716919 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.23065312206745148, + "learning_rate": 2.16716813194609e-07, + "loss": 0.175, + "step": 26541, + "teacher_loss": 0.16879487037658691 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.27378618717193604, + "learning_rate": 2.163322968055015e-07, + "loss": 0.2371, + "step": 26542, + "teacher_loss": 0.23298847675323486 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.352588415145874, + "learning_rate": 2.1594811936156923e-07, + "loss": 0.1551, + "step": 26543, + "teacher_loss": 0.13316959142684937 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5132936239242554, + "learning_rate": 2.155642808716235e-07, + "loss": 0.2136, + "step": 26544, + "teacher_loss": 0.18031004071235657 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.21607151627540588, + "learning_rate": 2.151807813444606e-07, + "loss": 0.1608, + "step": 26545, + "teacher_loss": 0.15461014211177826 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.2549065053462982, + "learning_rate": 2.1479762078887687e-07, + "loss": 0.1343, + "step": 26546, + "teacher_loss": 0.12088148295879364 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5719956755638123, + "learning_rate": 2.1441479921365526e-07, + "loss": 0.2295, + "step": 26547, + "teacher_loss": 0.19141870737075806 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.320910781621933, + "learning_rate": 2.1403231662757206e-07, + "loss": 0.2137, + "step": 26548, + "teacher_loss": 0.20182648301124573 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3157101273536682, + "learning_rate": 2.136501730393986e-07, + "loss": 0.2345, + "step": 26549, + "teacher_loss": 0.22548353672027588 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.663744330406189, + "learning_rate": 2.132683684578929e-07, + "loss": 0.2917, + "step": 26550, + "teacher_loss": 0.25032591819763184 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.6276815533638, + "learning_rate": 2.128869028918129e-07, + "loss": 0.2062, + "step": 26551, + "teacher_loss": 0.15932975709438324 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3351443409919739, + "learning_rate": 2.1250577634989999e-07, + "loss": 0.2057, + "step": 26552, + "teacher_loss": 0.19129127264022827 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.2985061705112457, + "learning_rate": 2.121249888408955e-07, + "loss": 0.1619, + "step": 26553, + "teacher_loss": 0.14672841131687164 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.23061443865299225, + "learning_rate": 2.1174454037352908e-07, + "loss": 0.1337, + "step": 26554, + "teacher_loss": 0.12288016080856323 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3937075436115265, + "learning_rate": 2.1136443095652213e-07, + "loss": 0.1821, + "step": 26555, + "teacher_loss": 0.1586090326309204 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.25787076354026794, + "learning_rate": 2.10984660598591e-07, + "loss": 0.1911, + "step": 26556, + "teacher_loss": 0.18364334106445312 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.2706703841686249, + "learning_rate": 2.1060522930844039e-07, + "loss": 0.1944, + "step": 26557, + "teacher_loss": 0.18589171767234802 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5510338544845581, + "learning_rate": 2.1022613709477167e-07, + "loss": 0.1931, + "step": 26558, + "teacher_loss": 0.15334567427635193 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3310496211051941, + "learning_rate": 2.0984738396627623e-07, + "loss": 0.2061, + "step": 26559, + "teacher_loss": 0.19226107001304626 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.43700751662254333, + "learning_rate": 2.0946896993163545e-07, + "loss": 0.247, + "step": 26560, + "teacher_loss": 0.2259148210287094 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.4481600522994995, + "learning_rate": 2.0909089499952737e-07, + "loss": 0.2181, + "step": 26561, + "teacher_loss": 0.19256016612052917 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.36073723435401917, + "learning_rate": 2.0871315917862177e-07, + "loss": 0.1654, + "step": 26562, + "teacher_loss": 0.14373403787612915 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.35423365235328674, + "learning_rate": 2.0833576247757502e-07, + "loss": 0.1731, + "step": 26563, + "teacher_loss": 0.1529546082019806 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.4972625970840454, + "learning_rate": 2.079587049050402e-07, + "loss": 0.2428, + "step": 26564, + "teacher_loss": 0.21449661254882812 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.6349788904190063, + "learning_rate": 2.0758198646966708e-07, + "loss": 0.2157, + "step": 26565, + "teacher_loss": 0.1691613495349884 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.18579766154289246, + "learning_rate": 2.072056071800854e-07, + "loss": 0.2132, + "step": 26566, + "teacher_loss": 0.21627789735794067 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.41962379217147827, + "learning_rate": 2.0682956704492993e-07, + "loss": 0.2324, + "step": 26567, + "teacher_loss": 0.21155965328216553 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.4972507357597351, + "learning_rate": 2.0645386607282048e-07, + "loss": 0.1863, + "step": 26568, + "teacher_loss": 0.1517573744058609 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5470048785209656, + "learning_rate": 2.0607850427237006e-07, + "loss": 0.2697, + "step": 26569, + "teacher_loss": 0.23885944485664368 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.40810444951057434, + "learning_rate": 2.0570348165218355e-07, + "loss": 0.2274, + "step": 26570, + "teacher_loss": 0.2073659598827362 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.42419108748435974, + "learning_rate": 2.0532879822086237e-07, + "loss": 0.2191, + "step": 26571, + "teacher_loss": 0.19627100229263306 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.646525502204895, + "learning_rate": 2.049544539869963e-07, + "loss": 0.2571, + "step": 26572, + "teacher_loss": 0.21382969617843628 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3486188054084778, + "learning_rate": 2.0458044895916516e-07, + "loss": 0.2363, + "step": 26573, + "teacher_loss": 0.22386500239372253 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.3252182602882385, + "learning_rate": 2.0420678314594542e-07, + "loss": 0.2471, + "step": 26574, + "teacher_loss": 0.23841966688632965 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.30904310941696167, + "learning_rate": 2.0383345655590358e-07, + "loss": 0.1908, + "step": 26575, + "teacher_loss": 0.17765100300312042 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.43673503398895264, + "learning_rate": 2.0346046919759942e-07, + "loss": 0.2027, + "step": 26576, + "teacher_loss": 0.17672871053218842 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.7287284135818481, + "learning_rate": 2.0308782107958445e-07, + "loss": 0.2266, + "step": 26577, + "teacher_loss": 0.17080318927764893 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.11723659932613373, + "learning_rate": 2.0271551221040352e-07, + "loss": 0.1334, + "step": 26578, + "teacher_loss": 0.13521084189414978 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.2978176474571228, + "learning_rate": 2.0234354259859144e-07, + "loss": 0.2185, + "step": 26579, + "teacher_loss": 0.20963937044143677 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.4201890826225281, + "learning_rate": 2.0197191225267308e-07, + "loss": 0.258, + "step": 26580, + "teacher_loss": 0.23993264138698578 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5311610698699951, + "learning_rate": 2.0160062118117496e-07, + "loss": 0.2511, + "step": 26581, + "teacher_loss": 0.22003470361232758 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.6326667070388794, + "learning_rate": 2.012296693926069e-07, + "loss": 0.2509, + "step": 26582, + "teacher_loss": 0.20848089456558228 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.6323836445808411, + "learning_rate": 2.0085905689547212e-07, + "loss": 0.2972, + "step": 26583, + "teacher_loss": 0.25994256138801575 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.5764419436454773, + "learning_rate": 2.0048878369826884e-07, + "loss": 0.1992, + "step": 26584, + "teacher_loss": 0.15727734565734863 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.25306063890457153, + "learning_rate": 2.0011884980948858e-07, + "loss": 0.1361, + "step": 26585, + "teacher_loss": 0.12312394380569458 + }, + { + "compression_loss": 0.0, + "epoch": 4.8, + "label_loss": 0.342345654964447, + "learning_rate": 1.9974925523760957e-07, + "loss": 0.2002, + "step": 26586, + "teacher_loss": 0.1844092756509781 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.9469391107559204, + "learning_rate": 1.9937999999110502e-07, + "loss": 0.3288, + "step": 26587, + "teacher_loss": 0.26008307933807373 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.40020638704299927, + "learning_rate": 1.9901108407844483e-07, + "loss": 0.1876, + "step": 26588, + "teacher_loss": 0.1639908254146576 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.23812930285930634, + "learning_rate": 1.9864250750808387e-07, + "loss": 0.1519, + "step": 26589, + "teacher_loss": 0.14229774475097656 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.3367200195789337, + "learning_rate": 1.9827427028847212e-07, + "loss": 0.2074, + "step": 26590, + "teacher_loss": 0.19302572309970856 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5522750020027161, + "learning_rate": 1.979063724280561e-07, + "loss": 0.2354, + "step": 26591, + "teacher_loss": 0.20022502541542053 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.2836771607398987, + "learning_rate": 1.9753881393526574e-07, + "loss": 0.1721, + "step": 26592, + "teacher_loss": 0.15974655747413635 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 1.2989203929901123, + "learning_rate": 1.9717159481853097e-07, + "loss": 0.4299, + "step": 26593, + "teacher_loss": 0.3332933783531189 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.40130162239074707, + "learning_rate": 1.9680471508627173e-07, + "loss": 0.2785, + "step": 26594, + "teacher_loss": 0.2648827135562897 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5575899481773376, + "learning_rate": 1.9643817474689795e-07, + "loss": 0.2638, + "step": 26595, + "teacher_loss": 0.2311980426311493 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.3221670389175415, + "learning_rate": 1.9607197380881127e-07, + "loss": 0.1929, + "step": 26596, + "teacher_loss": 0.17848925292491913 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.9010970592498779, + "learning_rate": 1.9570611228041158e-07, + "loss": 0.2609, + "step": 26597, + "teacher_loss": 0.18973296880722046 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.35315263271331787, + "learning_rate": 1.9534059017008554e-07, + "loss": 0.1966, + "step": 26598, + "teacher_loss": 0.17917457222938538 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.25410693883895874, + "learning_rate": 1.9497540748621145e-07, + "loss": 0.1369, + "step": 26599, + "teacher_loss": 0.12386883050203323 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5609424114227295, + "learning_rate": 1.946105642371626e-07, + "loss": 0.2188, + "step": 26600, + "teacher_loss": 0.1808059811592102 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.309104323387146, + "learning_rate": 1.9424606043130733e-07, + "loss": 0.163, + "step": 26601, + "teacher_loss": 0.14675165712833405 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.06861025094985962, + "learning_rate": 1.938818960769989e-07, + "loss": 0.1344, + "step": 26602, + "teacher_loss": 0.1417549103498459 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.21902698278427124, + "learning_rate": 1.9351807118258734e-07, + "loss": 0.2169, + "step": 26603, + "teacher_loss": 0.21667072176933289 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.3767809271812439, + "learning_rate": 1.931545857564143e-07, + "loss": 0.2217, + "step": 26604, + "teacher_loss": 0.20444533228874207 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.3183096945285797, + "learning_rate": 1.9279143980681312e-07, + "loss": 0.1932, + "step": 26605, + "teacher_loss": 0.17925378680229187 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.4314734637737274, + "learning_rate": 1.9242863334211213e-07, + "loss": 0.2083, + "step": 26606, + "teacher_loss": 0.18349778652191162 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.6829096674919128, + "learning_rate": 1.9206616637062468e-07, + "loss": 0.2428, + "step": 26607, + "teacher_loss": 0.19393488764762878 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.6199914216995239, + "learning_rate": 1.9170403890066412e-07, + "loss": 0.2224, + "step": 26608, + "teacher_loss": 0.178226500749588 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.3411809504032135, + "learning_rate": 1.9134225094053215e-07, + "loss": 0.2127, + "step": 26609, + "teacher_loss": 0.19847092032432556 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.41728919744491577, + "learning_rate": 1.9098080249852379e-07, + "loss": 0.306, + "step": 26610, + "teacher_loss": 0.2935827672481537 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5277500152587891, + "learning_rate": 1.9061969358292574e-07, + "loss": 0.2272, + "step": 26611, + "teacher_loss": 0.19379499554634094 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.1921905130147934, + "learning_rate": 1.9025892420201807e-07, + "loss": 0.1696, + "step": 26612, + "teacher_loss": 0.16709628701210022 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.32954180240631104, + "learning_rate": 1.8989849436407082e-07, + "loss": 0.3565, + "step": 26613, + "teacher_loss": 0.359546959400177 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5536991953849792, + "learning_rate": 1.89538404077349e-07, + "loss": 0.2744, + "step": 26614, + "teacher_loss": 0.24337589740753174 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.7158051133155823, + "learning_rate": 1.891786533501061e-07, + "loss": 0.2979, + "step": 26615, + "teacher_loss": 0.2514929473400116 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.45972874760627747, + "learning_rate": 1.888192421905921e-07, + "loss": 0.2135, + "step": 26616, + "teacher_loss": 0.18612083792686462 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.36339297890663147, + "learning_rate": 1.8846017060704712e-07, + "loss": 0.208, + "step": 26617, + "teacher_loss": 0.1907695084810257 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.6676833629608154, + "learning_rate": 1.881014386077029e-07, + "loss": 0.271, + "step": 26618, + "teacher_loss": 0.22695128619670868 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.15162141621112823, + "learning_rate": 1.8774304620078453e-07, + "loss": 0.1368, + "step": 26619, + "teacher_loss": 0.13515296578407288 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.1612195074558258, + "learning_rate": 1.8738499339450709e-07, + "loss": 0.1828, + "step": 26620, + "teacher_loss": 0.18524453043937683 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.22180190682411194, + "learning_rate": 1.8702728019708237e-07, + "loss": 0.2077, + "step": 26621, + "teacher_loss": 0.20616436004638672 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.7415960431098938, + "learning_rate": 1.8666990661671214e-07, + "loss": 0.2228, + "step": 26622, + "teacher_loss": 0.16519324481487274 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.33962446451187134, + "learning_rate": 1.8631287266158649e-07, + "loss": 0.2096, + "step": 26623, + "teacher_loss": 0.1951010823249817 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.557237982749939, + "learning_rate": 1.859561783398922e-07, + "loss": 0.2383, + "step": 26624, + "teacher_loss": 0.2028655707836151 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.15978658199310303, + "learning_rate": 1.8559982365980943e-07, + "loss": 0.167, + "step": 26625, + "teacher_loss": 0.16779188811779022 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.24664750695228577, + "learning_rate": 1.8524380862950663e-07, + "loss": 0.1499, + "step": 26626, + "teacher_loss": 0.13915738463401794 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.24275988340377808, + "learning_rate": 1.8488813325714558e-07, + "loss": 0.1498, + "step": 26627, + "teacher_loss": 0.13952095806598663 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.34714895486831665, + "learning_rate": 1.845327975508815e-07, + "loss": 0.1662, + "step": 26628, + "teacher_loss": 0.14604011178016663 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.2680235207080841, + "learning_rate": 1.8417780151886275e-07, + "loss": 0.1744, + "step": 26629, + "teacher_loss": 0.16394749283790588 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.17373903095722198, + "learning_rate": 1.838231451692246e-07, + "loss": 0.2282, + "step": 26630, + "teacher_loss": 0.23427070677280426 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5175390243530273, + "learning_rate": 1.8346882851010383e-07, + "loss": 0.1862, + "step": 26631, + "teacher_loss": 0.14937494695186615 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.3268944323062897, + "learning_rate": 1.8311485154961892e-07, + "loss": 0.2037, + "step": 26632, + "teacher_loss": 0.19006142020225525 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.34421366453170776, + "learning_rate": 1.827612142958851e-07, + "loss": 0.1783, + "step": 26633, + "teacher_loss": 0.15990768373012543 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.5900821685791016, + "learning_rate": 1.824079167570142e-07, + "loss": 0.1903, + "step": 26634, + "teacher_loss": 0.14584311842918396 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.21630124747753143, + "learning_rate": 1.820549589411047e-07, + "loss": 0.1252, + "step": 26635, + "teacher_loss": 0.11506427079439163 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.4203346371650696, + "learning_rate": 1.817023408562485e-07, + "loss": 0.199, + "step": 26636, + "teacher_loss": 0.17444080114364624 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.708977460861206, + "learning_rate": 1.813500625105291e-07, + "loss": 0.2219, + "step": 26637, + "teacher_loss": 0.1677657663822174 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 1.1075928211212158, + "learning_rate": 1.8099812391202508e-07, + "loss": 0.3448, + "step": 26638, + "teacher_loss": 0.2600868344306946 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.15651877224445343, + "learning_rate": 1.806465250688033e-07, + "loss": 0.1213, + "step": 26639, + "teacher_loss": 0.11742302775382996 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.4752455949783325, + "learning_rate": 1.8029526598892565e-07, + "loss": 0.2591, + "step": 26640, + "teacher_loss": 0.23503944277763367 + }, + { + "compression_loss": 0.0, + "epoch": 4.81, + "label_loss": 0.2704382538795471, + "learning_rate": 1.7994434668044735e-07, + "loss": 0.2266, + "step": 26641, + "teacher_loss": 0.22171147167682648 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.3054630756378174, + "learning_rate": 1.7959376715141195e-07, + "loss": 0.1703, + "step": 26642, + "teacher_loss": 0.15529881417751312 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.31029456853866577, + "learning_rate": 1.792435274098564e-07, + "loss": 0.1749, + "step": 26643, + "teacher_loss": 0.15984606742858887 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5667407512664795, + "learning_rate": 1.788936274638142e-07, + "loss": 0.245, + "step": 26644, + "teacher_loss": 0.20928195118904114 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.23693910241127014, + "learning_rate": 1.7854406732130402e-07, + "loss": 0.1826, + "step": 26645, + "teacher_loss": 0.1765764355659485 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.19595250487327576, + "learning_rate": 1.7819484699034106e-07, + "loss": 0.1442, + "step": 26646, + "teacher_loss": 0.13843604922294617 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.37882000207901, + "learning_rate": 1.7784596647893059e-07, + "loss": 0.1858, + "step": 26647, + "teacher_loss": 0.16432680189609528 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.17673683166503906, + "learning_rate": 1.774974257950762e-07, + "loss": 0.1408, + "step": 26648, + "teacher_loss": 0.13685429096221924 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5808209776878357, + "learning_rate": 1.7714922494676323e-07, + "loss": 0.3513, + "step": 26649, + "teacher_loss": 0.32579556107521057 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.3273374140262604, + "learning_rate": 1.7680136394197689e-07, + "loss": 0.2337, + "step": 26650, + "teacher_loss": 0.22333180904388428 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.6616871356964111, + "learning_rate": 1.764538427886958e-07, + "loss": 0.288, + "step": 26651, + "teacher_loss": 0.24653145670890808 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.27064263820648193, + "learning_rate": 1.7610666149488198e-07, + "loss": 0.2513, + "step": 26652, + "teacher_loss": 0.24920299649238586 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.4270990490913391, + "learning_rate": 1.75759820068499e-07, + "loss": 0.1714, + "step": 26653, + "teacher_loss": 0.14296120405197144 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.4188765287399292, + "learning_rate": 1.7541331851749885e-07, + "loss": 0.2489, + "step": 26654, + "teacher_loss": 0.22999805212020874 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.17144450545310974, + "learning_rate": 1.7506715684982522e-07, + "loss": 0.1775, + "step": 26655, + "teacher_loss": 0.17820894718170166 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5406796932220459, + "learning_rate": 1.747213350734117e-07, + "loss": 0.2812, + "step": 26656, + "teacher_loss": 0.2523888349533081 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.42695268988609314, + "learning_rate": 1.74375853196192e-07, + "loss": 0.3482, + "step": 26657, + "teacher_loss": 0.3394070863723755 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.4141199588775635, + "learning_rate": 1.740307112260847e-07, + "loss": 0.1921, + "step": 26658, + "teacher_loss": 0.1674213856458664 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5092183351516724, + "learning_rate": 1.7368590917100025e-07, + "loss": 0.2976, + "step": 26659, + "teacher_loss": 0.27409225702285767 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.4832767844200134, + "learning_rate": 1.7334144703884726e-07, + "loss": 0.1884, + "step": 26660, + "teacher_loss": 0.15559455752372742 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.3427741527557373, + "learning_rate": 1.729973248375244e-07, + "loss": 0.1403, + "step": 26661, + "teacher_loss": 0.11775478720664978 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.4078224301338196, + "learning_rate": 1.7265354257491706e-07, + "loss": 0.3148, + "step": 26662, + "teacher_loss": 0.3044697344303131 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.13483723998069763, + "learning_rate": 1.723101002589089e-07, + "loss": 0.1592, + "step": 26663, + "teacher_loss": 0.16192759573459625 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2916278541088104, + "learning_rate": 1.7196699789737535e-07, + "loss": 0.2142, + "step": 26664, + "teacher_loss": 0.20564445853233337 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 1.108000636100769, + "learning_rate": 1.7162423549818175e-07, + "loss": 0.2881, + "step": 26665, + "teacher_loss": 0.19698545336723328 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.3294225335121155, + "learning_rate": 1.7128181306918512e-07, + "loss": 0.1887, + "step": 26666, + "teacher_loss": 0.17303289473056793 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.3252015709877014, + "learning_rate": 1.7093973061824087e-07, + "loss": 0.2123, + "step": 26667, + "teacher_loss": 0.1998099386692047 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2935095429420471, + "learning_rate": 1.7059798815318605e-07, + "loss": 0.1733, + "step": 26668, + "teacher_loss": 0.15995007753372192 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5394579172134399, + "learning_rate": 1.7025658568185942e-07, + "loss": 0.2341, + "step": 26669, + "teacher_loss": 0.2001829743385315 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.44556427001953125, + "learning_rate": 1.6991552321208804e-07, + "loss": 0.1831, + "step": 26670, + "teacher_loss": 0.15393027663230896 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.664936900138855, + "learning_rate": 1.6957480075168896e-07, + "loss": 0.2308, + "step": 26671, + "teacher_loss": 0.18256829679012299 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5034996271133423, + "learning_rate": 1.6923441830847765e-07, + "loss": 0.3355, + "step": 26672, + "teacher_loss": 0.3168873190879822 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.291450560092926, + "learning_rate": 1.6889437589025613e-07, + "loss": 0.2241, + "step": 26673, + "teacher_loss": 0.216669499874115 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.9409869909286499, + "learning_rate": 1.685546735048199e-07, + "loss": 0.3544, + "step": 26674, + "teacher_loss": 0.289250910282135 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.31029993295669556, + "learning_rate": 1.682153111599577e-07, + "loss": 0.1713, + "step": 26675, + "teacher_loss": 0.1558365374803543 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2216094434261322, + "learning_rate": 1.6787628886345162e-07, + "loss": 0.1747, + "step": 26676, + "teacher_loss": 0.16944356262683868 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2281738817691803, + "learning_rate": 1.6753760662307217e-07, + "loss": 0.1549, + "step": 26677, + "teacher_loss": 0.146717369556427 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.3380930423736572, + "learning_rate": 1.6719926444658472e-07, + "loss": 0.213, + "step": 26678, + "teacher_loss": 0.19913268089294434 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.37729179859161377, + "learning_rate": 1.6686126234174981e-07, + "loss": 0.1871, + "step": 26679, + "teacher_loss": 0.16599278151988983 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2691551148891449, + "learning_rate": 1.665236003163112e-07, + "loss": 0.19, + "step": 26680, + "teacher_loss": 0.1811722218990326 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.41396981477737427, + "learning_rate": 1.6618627837801437e-07, + "loss": 0.1885, + "step": 26681, + "teacher_loss": 0.1634656935930252 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.20239734649658203, + "learning_rate": 1.6584929653459313e-07, + "loss": 0.1714, + "step": 26682, + "teacher_loss": 0.16795362532138824 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.18324315547943115, + "learning_rate": 1.655126547937713e-07, + "loss": 0.1732, + "step": 26683, + "teacher_loss": 0.1720539927482605 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2754879891872406, + "learning_rate": 1.6517635316326772e-07, + "loss": 0.1398, + "step": 26684, + "teacher_loss": 0.1247037947177887 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2101212441921234, + "learning_rate": 1.6484039165079455e-07, + "loss": 0.1703, + "step": 26685, + "teacher_loss": 0.16590210795402527 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.6218718886375427, + "learning_rate": 1.6450477026405232e-07, + "loss": 0.233, + "step": 26686, + "teacher_loss": 0.18975883722305298 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.9356488585472107, + "learning_rate": 1.6416948901073648e-07, + "loss": 0.3311, + "step": 26687, + "teacher_loss": 0.26387500762939453 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.7857467532157898, + "learning_rate": 1.6383454789853425e-07, + "loss": 0.3579, + "step": 26688, + "teacher_loss": 0.3104079067707062 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.15420745313167572, + "learning_rate": 1.634999469351245e-07, + "loss": 0.2001, + "step": 26689, + "teacher_loss": 0.20521730184555054 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.5287690758705139, + "learning_rate": 1.631656861281794e-07, + "loss": 0.1781, + "step": 26690, + "teacher_loss": 0.13909929990768433 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.41596531867980957, + "learning_rate": 1.6283176548536283e-07, + "loss": 0.1924, + "step": 26691, + "teacher_loss": 0.16759786009788513 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.12350337207317352, + "learning_rate": 1.6249818501432866e-07, + "loss": 0.1363, + "step": 26692, + "teacher_loss": 0.1377372443675995 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.24458357691764832, + "learning_rate": 1.621649447227258e-07, + "loss": 0.1729, + "step": 26693, + "teacher_loss": 0.1649162769317627 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.2510814964771271, + "learning_rate": 1.6183204461819478e-07, + "loss": 0.2103, + "step": 26694, + "teacher_loss": 0.20580416917800903 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.6063018441200256, + "learning_rate": 1.6149948470836785e-07, + "loss": 0.2943, + "step": 26695, + "teacher_loss": 0.2595962882041931 + }, + { + "compression_loss": 0.0, + "epoch": 4.82, + "label_loss": 0.33197978138923645, + "learning_rate": 1.6116726500087052e-07, + "loss": 0.1793, + "step": 26696, + "teacher_loss": 0.162329763174057 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5743714570999146, + "learning_rate": 1.608353855033168e-07, + "loss": 0.2847, + "step": 26697, + "teacher_loss": 0.25247690081596375 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.20145922899246216, + "learning_rate": 1.6050384622331882e-07, + "loss": 0.1412, + "step": 26698, + "teacher_loss": 0.1345311403274536 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.33690956234931946, + "learning_rate": 1.6017264716847557e-07, + "loss": 0.1714, + "step": 26699, + "teacher_loss": 0.15300868451595306 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.44958004355430603, + "learning_rate": 1.5984178834638264e-07, + "loss": 0.2363, + "step": 26700, + "teacher_loss": 0.21259824931621552 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.3916698098182678, + "learning_rate": 1.5951126976462392e-07, + "loss": 0.1956, + "step": 26701, + "teacher_loss": 0.17384150624275208 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5447748899459839, + "learning_rate": 1.5918109143077842e-07, + "loss": 0.184, + "step": 26702, + "teacher_loss": 0.14386287331581116 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.36514341831207275, + "learning_rate": 1.5885125335241334e-07, + "loss": 0.2097, + "step": 26703, + "teacher_loss": 0.19239550828933716 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.6458698511123657, + "learning_rate": 1.5852175553709436e-07, + "loss": 0.2878, + "step": 26704, + "teacher_loss": 0.24798163771629333 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.2681099772453308, + "learning_rate": 1.581925979923754e-07, + "loss": 0.1616, + "step": 26705, + "teacher_loss": 0.14973688125610352 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.3242238461971283, + "learning_rate": 1.5786378072580044e-07, + "loss": 0.2206, + "step": 26706, + "teacher_loss": 0.20908290147781372 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.2812730669975281, + "learning_rate": 1.575353037449101e-07, + "loss": 0.1751, + "step": 26707, + "teacher_loss": 0.16328656673431396 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.679393470287323, + "learning_rate": 1.572071670572367e-07, + "loss": 0.1939, + "step": 26708, + "teacher_loss": 0.13993003964424133 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4808571934700012, + "learning_rate": 1.568793706703009e-07, + "loss": 0.1729, + "step": 26709, + "teacher_loss": 0.13870534300804138 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5350741147994995, + "learning_rate": 1.5655191459161833e-07, + "loss": 0.2422, + "step": 26710, + "teacher_loss": 0.20962196588516235 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.3469955325126648, + "learning_rate": 1.5622479882869968e-07, + "loss": 0.1776, + "step": 26711, + "teacher_loss": 0.1588045358657837 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.23887155950069427, + "learning_rate": 1.5589802338904058e-07, + "loss": 0.1475, + "step": 26712, + "teacher_loss": 0.1373380869626999 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5115737318992615, + "learning_rate": 1.5557158828013508e-07, + "loss": 0.2444, + "step": 26713, + "teacher_loss": 0.21470607817173004 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.25544148683547974, + "learning_rate": 1.5524549350946716e-07, + "loss": 0.1577, + "step": 26714, + "teacher_loss": 0.14681033790111542 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.34575194120407104, + "learning_rate": 1.549197390845142e-07, + "loss": 0.2106, + "step": 26715, + "teacher_loss": 0.19560036063194275 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.6323339343070984, + "learning_rate": 1.5459432501274184e-07, + "loss": 0.2306, + "step": 26716, + "teacher_loss": 0.18596941232681274 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4580984115600586, + "learning_rate": 1.5426925130161417e-07, + "loss": 0.2443, + "step": 26717, + "teacher_loss": 0.22054031491279602 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.22569376230239868, + "learning_rate": 1.5394451795858355e-07, + "loss": 0.1348, + "step": 26718, + "teacher_loss": 0.12474530935287476 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5164756178855896, + "learning_rate": 1.53620124991094e-07, + "loss": 0.2107, + "step": 26719, + "teacher_loss": 0.17669570446014404 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.3976835012435913, + "learning_rate": 1.532960724065813e-07, + "loss": 0.1673, + "step": 26720, + "teacher_loss": 0.14166569709777832 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5056062936782837, + "learning_rate": 1.529723602124794e-07, + "loss": 0.2076, + "step": 26721, + "teacher_loss": 0.17444908618927002 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4073508381843567, + "learning_rate": 1.5264898841620577e-07, + "loss": 0.187, + "step": 26722, + "teacher_loss": 0.16257119178771973 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.1556430160999298, + "learning_rate": 1.5232595702517616e-07, + "loss": 0.1709, + "step": 26723, + "teacher_loss": 0.17256800830364227 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.6193772554397583, + "learning_rate": 1.5200326604679794e-07, + "loss": 0.3093, + "step": 26724, + "teacher_loss": 0.2748171091079712 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.669753909111023, + "learning_rate": 1.5168091548846686e-07, + "loss": 0.2303, + "step": 26725, + "teacher_loss": 0.18145973980426788 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.6311920881271362, + "learning_rate": 1.5135890535757535e-07, + "loss": 0.2521, + "step": 26726, + "teacher_loss": 0.20994237065315247 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.33619800209999084, + "learning_rate": 1.5103723566150752e-07, + "loss": 0.1732, + "step": 26727, + "teacher_loss": 0.1551414430141449 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.1990974247455597, + "learning_rate": 1.5071590640763412e-07, + "loss": 0.2076, + "step": 26728, + "teacher_loss": 0.20851171016693115 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.7425979375839233, + "learning_rate": 1.503949176033259e-07, + "loss": 0.2227, + "step": 26729, + "teacher_loss": 0.164947509765625 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.32283148169517517, + "learning_rate": 1.500742692559387e-07, + "loss": 0.2775, + "step": 26730, + "teacher_loss": 0.2724207043647766 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.2430761754512787, + "learning_rate": 1.4975396137282827e-07, + "loss": 0.1839, + "step": 26731, + "teacher_loss": 0.17733968794345856 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.47460079193115234, + "learning_rate": 1.494339939613354e-07, + "loss": 0.2338, + "step": 26732, + "teacher_loss": 0.207024484872818 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.582412838935852, + "learning_rate": 1.491143670287959e-07, + "loss": 0.2134, + "step": 26733, + "teacher_loss": 0.17242959141731262 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4968038499355316, + "learning_rate": 1.487950805825389e-07, + "loss": 0.2068, + "step": 26734, + "teacher_loss": 0.1745903342962265 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.532997727394104, + "learning_rate": 1.484761346298852e-07, + "loss": 0.2522, + "step": 26735, + "teacher_loss": 0.22101366519927979 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.6254366040229797, + "learning_rate": 1.481575291781473e-07, + "loss": 0.2791, + "step": 26736, + "teacher_loss": 0.24064716696739197 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4757692217826843, + "learning_rate": 1.4783926423462769e-07, + "loss": 0.248, + "step": 26737, + "teacher_loss": 0.22274474799633026 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4548686742782593, + "learning_rate": 1.4752133980662553e-07, + "loss": 0.1997, + "step": 26738, + "teacher_loss": 0.17135955393314362 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.529330313205719, + "learning_rate": 1.4720375590142833e-07, + "loss": 0.3162, + "step": 26739, + "teacher_loss": 0.2925126850605011 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.30171215534210205, + "learning_rate": 1.4688651252631857e-07, + "loss": 0.2308, + "step": 26740, + "teacher_loss": 0.22292988002300262 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.4987463355064392, + "learning_rate": 1.465696096885688e-07, + "loss": 0.1903, + "step": 26741, + "teacher_loss": 0.15598398447036743 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.46644288301467896, + "learning_rate": 1.4625304739544487e-07, + "loss": 0.2191, + "step": 26742, + "teacher_loss": 0.1915905922651291 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.24813279509544373, + "learning_rate": 1.4593682565420597e-07, + "loss": 0.1833, + "step": 26743, + "teacher_loss": 0.17605122923851013 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 1.0945628881454468, + "learning_rate": 1.4562094447209962e-07, + "loss": 0.3875, + "step": 26744, + "teacher_loss": 0.30895352363586426 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.5107935667037964, + "learning_rate": 1.4530540385637003e-07, + "loss": 0.2381, + "step": 26745, + "teacher_loss": 0.20775283873081207 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 1.1282711029052734, + "learning_rate": 1.4499020381424978e-07, + "loss": 0.396, + "step": 26746, + "teacher_loss": 0.31461966037750244 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.32896384596824646, + "learning_rate": 1.4467534435296638e-07, + "loss": 0.2452, + "step": 26747, + "teacher_loss": 0.23587128520011902 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.24232040345668793, + "learning_rate": 1.4436082547974073e-07, + "loss": 0.1504, + "step": 26748, + "teacher_loss": 0.14023542404174805 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.3009645342826843, + "learning_rate": 1.4404664720177874e-07, + "loss": 0.1469, + "step": 26749, + "teacher_loss": 0.12974300980567932 + }, + { + "compression_loss": 0.0, + "epoch": 4.83, + "label_loss": 0.19984322786331177, + "learning_rate": 1.4373280952628963e-07, + "loss": 0.1788, + "step": 26750, + "teacher_loss": 0.17648035287857056 + }, + { + "epoch": 4.83, + "eval_exact_match": 80.70009460737937, + "eval_f1": 87.97766082430648, + "step": 26750 + } + ], + "max_steps": 27665, + "num_train_epochs": 5, + "total_flos": 1.997832401702093e+16, + "trial_name": null, + "trial_params": null +}